@inproceedings{Ye_CVPR_2025,author={Ye, Jinhui and Wang, Zihan and Sun, Haosen and Chandrasegaran, Keshigeyan and Durante, Zane and Eyzaguirre, Cristobal and Bisk, Yonatan and Niebles, Juan Carlos and Adeli, Ehsan and Fei-Fei, Li and Wu, Jiajun and Li, Manling},title={Re-thinking Temporal Search for Long-Form Video Understanding},booktitle={IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},address={Nashville, Tennessee},year={2025},month=jun,}
@inproceedings{Eyzaguirre_NeurIPS_2024,author={Eyzaguirre, Cristobal and Tang, Eric and Buch, Shyamal and Gaidon, Adrien and Wu, Jiajun and Niebles, Juan Carlos},title={Streaming Detection of Queried Event Start},booktitle={Advances in Neural Information Processing Systems (NeurIPS), Datasets and Benchmarks Track},address={Vancouver, Canada},month=dec,year={2024},}
APIGen: Automated PIpeline for Generating Verifiable and Diverse Function-Calling Datasets
Zuxin Liu, Thai Quoc Hoang, Jianguo Zhang, Ming Zhu, Tian Lan, Shirley Kokane, Juntao Tan, Weiran Yao, Zhiwei Liu, Yihao Feng, and 7 more authors
In Advances in Neural Information Processing Systems (NeurIPS), Datasets and Benchmarks Track. Vancouver, Canada. Dec 2024
@inproceedings{Zuxin_NeurIPS_2024,author={Liu, Zuxin and Hoang, Thai Quoc and Zhang, Jianguo and Zhu, Ming and Lan, Tian and Kokane, Shirley and Tan, Juntao and Yao, Weiran and Liu, Zhiwei and Feng, Yihao and Murthy, Rithesh and Yang, Liangwei and Savarese, Silvio and Niebles, Juan Carlos and Wang, Huan and Heinecke, Shelby and Xiong, Caiming},title={{APIGen}: Automated PIpeline for Generating Verifiable and Diverse Function-Calling Datasets},booktitle={Advances in Neural Information Processing Systems (NeurIPS), Datasets and Benchmarks Track},address={Vancouver, Canada},month=dec,year={2024},}
IKEA Manuals at Work: 4D Grounding of Assembly Instructions on Internet Videos
Yunong Liu, Weiyu Liu, Shubh Khanna, Cristobal Eyzaguirre, Manling Li, Juan Carlos Niebles, Vineeth Ravi, Saumitra Mishra, and Jiajun Wu
In Advances in Neural Information Processing Systems (NeurIPS), Datasets and Benchmarks Track. Vancouver, Canada. Dec 2024
@inproceedings{Yunong_NeurIPS_2024,author={Liu, Yunong and Liu, Weiyu and Khanna, Shubh and Eyzaguirre, Cristobal and Li, Manling and Niebles, Juan Carlos and Ravi, Vineeth and Mishra, Saumitra and Wu, Jiajun},title={{IKEA} Manuals at Work: 4D Grounding of Assembly Instructions on Internet Videos},booktitle={Advances in Neural Information Processing Systems (NeurIPS), Datasets and Benchmarks Track},address={Vancouver, Canada},month=dec,year={2024},}
X-InstructBLIP: A Framework for Aligning Image, 3D, Audio, Video to LLMs and its Emergent Cross-modal Reasoning
Artemis Panagopoulou, Le Xue, Ning Yu, Junnan Li, Dongxu Li, Shafiq Joty, Ran Xu, Silvio Savarese, Caiming Xiong, and Juan Carlos Niebles
In European Conference on Computer Vision (ECCV). Milan, Italy. Oct 2024
@inproceedings{ArtemisECCV2024,author={Panagopoulou, Artemis and Xue, Le and Yu, Ning and Li, Junnan and Li, Dongxu and Joty, Shafiq and Xu, Ran and Savarese, Silvio and Xiong, Caiming and Niebles, Juan Carlos},title={{X-InstructBLIP}: A Framework for Aligning Image, 3D, Audio, Video to {LLM}s and its Emergent Cross-modal Reasoning},booktitle={European Conference on Computer Vision (ECCV)},address={Milan, Italy},month=oct,year={2024},doi={10.1007/978-3-031-72995-9_11},}
LayoutDETR: Detection Transformer Is a Good Multimodal Layout Designer
Ning Yu, Chia-Chih Chen, Zeyuan Chen, Rui Meng, Gang Wu, Paul Josel, Juan Carlos Niebles, Caiming Xiong, and Ran Xu
In European Conference on Computer Vision (ECCV). Milan, Italy. Oct 2024
@inproceedings{YuECCV2024,author={Yu, Ning and Chen, Chia-Chih and Chen, Zeyuan and Meng, Rui and Wu, Gang and Josel, Paul and Niebles, Juan Carlos and Xiong, Caiming and Xu, Ran},title={{LayoutDETR}: Detection Transformer Is a Good Multimodal Layout Designer},booktitle={European Conference on Computer Vision (ECCV)},address={Milan, Italy},month=oct,year={2024},doi={10.1007/978-3-031-72661-3_10},}
ULIP-2: Towards Scalable Multimodal Pre-training for 3D Understanding
Le Xue, Ning Yu, Shu Zhang, Artemis Panagopoulou, Junnan Li, Roberto Martín-Martín, Jiajun Wu, Caiming Xiong, Ran Xu, Juan Carlos Niebles, and 1 more author
In IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR). Seattle, Washington. Jun 2024
@inproceedings{Xue_CVPR_2024,author={Xue, Le and Yu, Ning and Zhang, Shu and Panagopoulou, Artemis and Li, Junnan and Martín-Martín, Roberto and Wu, Jiajun and Xiong, Caiming and Xu, Ran and Niebles, Juan Carlos and Savarese, Silvio},title={{ULIP-2}: Towards Scalable Multimodal Pre-training for {3D} Understanding},booktitle={IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},address={Seattle, Washington},month=jun,year={2024},doi={10.1109/CVPR52733.2024.02558},}
2023
ULIP: Learning Unified Representation of Language, Image and Point Cloud for 3D Understanding
Le Xue, Mingfei Gao, Chen Xing, Roberto Martín-Martín, Jiajun Wu, Caiming Xiong, Ran Xu, Juan Carlos Niebles, and Silvio Savarese
In IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR). Vancouver, Canada. Jun 2023
@inproceedings{Xue_CVPR_2023,author={Xue, Le and Gao, Mingfei and Xing, Chen and Martín-Martín, Roberto and Wu, Jiajun and Xiong, Caiming and Xu, Ran and Niebles, Juan Carlos and Savarese, Silvio},title={{ULIP}: Learning Unified Representation of Language, Image and Point Cloud for 3D Understanding},booktitle={IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},address={Vancouver, Canada},month=jun,year={2023},doi={10.1109/CVPR52729.2023.00120}}
2022
MOMA-LRG: Language-Refined Graphs for Multi-Object Multi-Actor Activity Parsing
Zelun Luo, Zane Durante, Linden Li, Wanze Xie, Ruochen Liu, Emily Jin , Zhuoyi Huang, Lun Yu Li, Jiajun Wu, Juan Carlos Niebles, and 2 more authors
In Advances in Neural Information Processing Systems (NeurIPS), Datasets and Benchmarks Track. New Orleans, USA. Dec 2022
@inproceedings{Luo_NeurIPS_2022,author={Luo, Zelun and Durante, Zane and Li, Linden and Xie, Wanze and Liu, Ruochen and Jin, Emily and Huang, Zhuoyi and Li, Lun Yu and Wu, Jiajun and Niebles, Juan Carlos and Adeli, Ehsan and Fei-Fei, Li},title={{MOMA-LRG}: Language-Refined Graphs for Multi-Object Multi-Actor Activity Parsing},booktitle={Advances in Neural Information Processing Systems (NeurIPS), Datasets and Benchmarks Track},address={New Orleans, USA},month=dec,year={2022},}
2020
Action Genome: Actions as Compositions of Spatio-temporal Scene Graphs
Jingwei Ji, Ranjay Krishna, Li Fei-Fei, and Juan Carlos Niebles
In IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR). Seattle, Washington, USA. Jun 2020
@inproceedings{Ji_CVPR_2020,author={Ji, Jingwei and Krishna, Ranjay and Fei-Fei, Li and Niebles, Juan Carlos},title={Action Genome: Actions as Compositions of Spatio-temporal Scene Graphs},booktitle={IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},address={Seattle, Washington, USA},month=jun,year={2020},doi={10.1109/CVPR42600.2020.01025},}
Few-Shot Video Classification via Temporal Alignment
Kaidi Cao, Jingwei Ji, Zhangjie Cao, Chien-Yi Chang, and Juan Carlos Niebles
In IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR). Seattle, Washington, USA. Jun 2020
@inproceedings{Cao_CVPR_2020,author={Cao, Kaidi and Ji, Jingwei and Cao, Zhangjie and Chang, Chien-Yi and Niebles, Juan Carlos},title={Few-Shot Video Classification via Temporal Alignment},booktitle={IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},address={Seattle, Washington, USA},month=jun,year={2020},doi={10.1109/CVPR42600.2020.01063},}
Spatiotemporal Relationship Reasoning for Pedestrian Intent Prediction
Bingbin Liu, Ehsan Adeli, Zhangjie Cao, Kuan-Hui Lee, Abhijeet Shenoi, Adrien Gaidon, and Juan Carlos Niebles
IEEE Robotics and Automation Letters (RA-L) and IEEE International Conference on Robotics and Automation (ICRA). Paris, France. May 2020
@article{Liu_RAL_2020,author={Liu, Bingbin and Adeli, Ehsan and Cao, Zhangjie and Lee, Kuan-Hui and Shenoi, Abhijeet and Gaidon, Adrien and Niebles, Juan Carlos},title={Spatiotemporal Relationship Reasoning for Pedestrian Intent Prediction},journal={IEEE Robotics and Automation Letters (RA-L) and IEEE International Conference on Robotics and Automation (ICRA)},volume={5},number={2},address={Paris, France},month=may,year={2020},doi={10.1109/LRA.2020.2976305},}
2018
Translating Navigation Instructions in Natural Language to a High-Level Plan for Behavioral Robot Navigation
Xiaoxue Zang, Ashwini Pokle, Marynel Vázquez, Kevin Chen, Juan Carlos Niebles, Alvaro Soto, and Silvio Savarese
In Conference on Empirical Methods in Natural Language Processing (EMNLP). Brussels, Belgium. Nov 2018
@inproceedings{Zang_EMNLP_2018,author={Zang, Xiaoxue and Pokle, Ashwini and Vázquez, Marynel and Chen, Kevin and Niebles, Juan Carlos and Soto, Alvaro and Savarese, Silvio},title={Translating Navigation Instructions in Natural Language to a High-Level Plan for Behavioral Robot Navigation},booktitle={Conference on Empirical Methods in Natural Language Processing (EMNLP)},address={Brussels, Belgium},month=nov,year={2018},doi={10.18653/v1/D18-1286},}
2017
Dense-Captioning Events in Videos
Ranjay Krishna, Kenji Hata, Frederic Ren, Li Fei-Fei, and Juan Carlos Niebles
In IEEE International Conference on Computer Vision (ICCV). Venice, Italy. Oct 2017
@inproceedings{Krishna_ICCV_2017,author={Krishna, Ranjay and Hata, Kenji and Ren, Frederic and Fei-Fei, Li and Niebles, Juan Carlos},title={Dense-Captioning Events in Videos},booktitle={IEEE International Conference on Computer Vision (ICCV)},address={Venice, Italy},month=oct,year={2017},doi={10.1109/ICCV.2017.83},}
Spotlight
Agent-centric Risk Assessment: Accident Anticipation and Risky Region Localization
Kuo-Hao Zeng, Shih-Han Chou, Fu-Hsiang Chan, Juan Carlos Niebles, and Min Sun
In IEEE Conference on Computer Vision and Pattern Recognition (CVPR). Honolulu, Hawaii, USA. Jun 2017
@inproceedings{Zeng_CVPR_2017,author={Zeng, Kuo-Hao and Chou, Shih-Han and Chan, Fu-Hsiang and Niebles, Juan Carlos and Sun, Min},title={Agent-centric Risk Assessment: Accident Anticipation and Risky Region Localization},booktitle={IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},address={Honolulu, Hawaii, USA},month=jun,year={2017},doi={10.1109/CVPR.2017.146},}
Sparse composition of body poses and atomic actions for human activity recognition in RGB-D videos
@article{Lillo_IVC_2017,author={Lillo, Ivan and Niebles, Juan Carlos and Soto, Alvaro},title={Sparse composition of body poses and atomic actions for human activity recognition in {RGB-D} videos},journal={Image and Vision Computing},volume={59},month=mar,year={2017},doi={10.1016/j.imavis.2016.11.004},}
Leveraging Video Descriptions to Learn Video Question Answering
Kuo-Hao Zeng, Tseng-Hung Chen, Ching-Yao Chuang, Yuan-Hong Liao, Juan Carlos Niebles Sun, and Min
In AAAI Conference on Artificial Intelligence (AAAI). San Francisco, USA. Feb 2017
@inproceedings{Zeng_AAAI_2017,author={Zeng, Kuo-Hao and Chen, Tseng-Hung and Chuang, Ching-Yao and Liao, Yuan-Hong and Sun, Juan Carlos Niebles and Min},title={Leveraging Video Descriptions to Learn Video Question Answering},booktitle={AAAI Conference on Artificial Intelligence (AAAI)},address={San Francisco, USA},month=feb,year={2017},doi={10.1609/aaai.v31i1.11238},}
2016
Title Generation for User Generated Videos
Kuo-Hao Zeng, Tseng-Hung Chen, Juan Carlos Niebles, and Min Sun
In European Conference on Computer Vision (ECCV). Amsterdam, Netherlands. Oct 2016
@inproceedings{Zeng_ECCV_2016,author={Zeng, Kuo-Hao and Chen, Tseng-Hung and Niebles, Juan Carlos and Sun, Min},title={Title Generation for User Generated Videos},booktitle={European Conference on Computer Vision (ECCV)},address={Amsterdam, Netherlands},month=oct,year={2016},doi={10.1007/978-3-319-46475-6_38},}
A Hierarchical Pose-Based Approach to Complex Action Understanding Using Dictionaries of Actionlets and Motion Poselets
Ivan Lillo, Juan Carlos Niebles, and Alvaro Soto
In IEEE Conference on Computer Vision and Pattern Recognition (CVPR). Las Vegas, USA. Jun 2016
@inproceedings{Lillo_CVPR_2016,author={Lillo, Ivan and Niebles, Juan Carlos and Soto, Alvaro},title={A Hierarchical Pose-Based Approach to Complex Action Understanding Using Dictionaries of Actionlets and Motion Poselets},booktitle={IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},address={Las Vegas, USA},month=jun,year={2016},doi={10.1109/CVPR.2016.218},}
2015
ActivityNet: A Large-Scale Video Benchmark for Human Activity Understanding
@inproceedings{Caba_CVPR_2015,author={Caba Heilbron, Fabian and Escorcia, Victor and Ghanem, Bernard and Niebles, Juan Carlos},title={{ActivityNet}: A Large-Scale Video Benchmark for Human Activity Understanding},booktitle={IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},address={Boston, USA},month=jun,year={2015},doi={10.1109/CVPR.2015.7298698},}
2014
Discriminative Hierarchical Modeling of Spatio-Temporally Composable Human Activities
Ivan Lillo, Alvaro Soto, and Juan Carlos Niebles
In IEEE Conference on Computer Vision and Pattern Recognition (CVPR). Colombus, USA. Jun 2014
@inproceedings{Lillo_CVPR_2014,author={Lillo, Ivan and Soto, Alvaro and Niebles, Juan Carlos},title={Discriminative Hierarchical Modeling of Spatio-Temporally Composable Human Activities},booktitle={IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},address={Colombus, USA},month=jun,year={2014},doi={10.1109/CVPR.2014.109},}
2010
Oral
Modeling Temporal Structure of Decomposable Motion Segments for Activity Classification
Juan Carlos Niebles, Chih-Wei Chen, and Li Fei-Fei
In European Conference on Computer Vision (ECCV). Hersonissos, Crete, Greece. Sep 2010
@inproceedings{Niebles_ECCV_2010,author={Niebles, Juan Carlos and Chen, Chih-Wei and Fei-Fei, Li},title={Modeling Temporal Structure of Decomposable Motion Segments for Activity Classification},booktitle={European Conference on Computer Vision (ECCV)},address={Hersonissos, Crete, Greece},month=sep,year={2010},doi={10.1007/978-3-642-15552-9_29},}
2008
Extracting Moving People from Internet Videos
Juan Carlos Niebles, Bohyung Han, Andras Ferencz, and Li Fei-Fei
In European Conference on Computer Vision (ECCV). Marseilles, France. Oct 2008
@inproceedings{Niebles_ECCV_2008,author={Niebles, Juan Carlos and Han, Bohyung and Ferencz, Andras and Fei-Fei, Li},title={Extracting Moving People from Internet Videos},booktitle={European Conference on Computer Vision (ECCV)},address={Marseilles, France},month=oct,year={2008},doi={10.1007/978-3-540-88693-8_39},}