1952 lines
60 KiB
BibTeX
1952 lines
60 KiB
BibTeX
|
|
@InProceedings{ Ahmed.Aly.Gonzalez.ea.2012,
|
|
title = {Scalable inference in latent variable models},
|
|
author = {Ahmed, Amr and Aly, Moahmed and Gonzalez, Joseph and
|
|
Narayanamurthy, Shravan and Smola, Alexander J},
|
|
booktitle = {Proceedings of the fifth ACM international conference on
|
|
Web search and data mining},
|
|
pages = {123--132},
|
|
year = {2012},
|
|
organization = {ACM}
|
|
}
|
|
|
|
@Article{ Aji.McEliece.2000,
|
|
title = {The generalized distributive law},
|
|
author = {Aji, Srinivas M and McEliece, Robert J},
|
|
journal = {IEEE transactions on Information Theory},
|
|
volume = {46},
|
|
number = {2},
|
|
pages = {325--343},
|
|
year = {2000},
|
|
publisher = {IEEE}
|
|
}
|
|
|
|
@Article{ Ba.Kiros.Hinton.2016,
|
|
title = {Layer normalization},
|
|
author = {Ba, Jimmy Lei and Kiros, Jamie Ryan and Hinton, Geoffrey
|
|
E},
|
|
journal = {arXiv preprint arXiv:1607.06450},
|
|
year = {2016}
|
|
}
|
|
|
|
@Article{ Bahdanau.Cho.Bengio.2014,
|
|
title = {Neural machine translation by jointly learning to align
|
|
and translate},
|
|
author = {Bahdanau, Dzmitry and Cho, Kyunghyun and Bengio, Yoshua},
|
|
journal = {arXiv preprint arXiv:1409.0473},
|
|
year = {2014}
|
|
}
|
|
|
|
@InProceedings{ Bay.Tuytelaars.Van-Gool.2006,
|
|
title = {Surf: Speeded up robust features},
|
|
author = {Bay, Herbert and Tuytelaars, Tinne and Van Gool, Luc},
|
|
booktitle = {European conference on computer vision},
|
|
pages = {404--417},
|
|
year = {2006},
|
|
organization = {Springer}
|
|
}
|
|
|
|
@Article{ Bengio.Ducharme.Vincent.ea.2003,
|
|
title = {A neural probabilistic language model},
|
|
author = {Bengio, Yoshua and Ducharme, R{\'e}jean and Vincent,
|
|
Pascal and Jauvin, Christian},
|
|
journal = {Journal of machine learning research},
|
|
volume = {3},
|
|
number = {Feb},
|
|
pages = {1137--1155},
|
|
year = {2003}
|
|
}
|
|
|
|
@Article{ Bishop.1995,
|
|
title = {Training with noise is equivalent to Tikhonov
|
|
regularization},
|
|
author = {Bishop, Chris M},
|
|
journal = {Neural computation},
|
|
volume = {7},
|
|
number = {1},
|
|
pages = {108--116},
|
|
year = {1995},
|
|
publisher = {MIT Press}
|
|
}
|
|
|
|
@Book{ Bishop.2006,
|
|
title = {Pattern recognition and machine learning},
|
|
author = {Bishop, Christopher M},
|
|
year = {2006},
|
|
publisher = {springer}
|
|
}
|
|
|
|
@InProceedings{ Bodla.Singh.Chellappa.ea.2017,
|
|
title = {Soft-NMS--improving object detection with one line of
|
|
code},
|
|
author = {Bodla, Navaneeth and Singh, Bharat and Chellappa, Rama and
|
|
Davis, Larry S},
|
|
booktitle = {Proceedings of the IEEE international conference on
|
|
computer vision},
|
|
pages = {5561--5569},
|
|
year = {2017}
|
|
}
|
|
|
|
@Article{ Bojanowski.Grave.Joulin.ea.2017,
|
|
title = {Enriching word vectors with subword information},
|
|
author = {Bojanowski, Piotr and Grave, Edouard and Joulin, Armand
|
|
and Mikolov, Tomas},
|
|
journal = {Transactions of the Association for Computational
|
|
Linguistics},
|
|
volume = {5},
|
|
pages = {135--146},
|
|
year = {2017},
|
|
publisher = {MIT Press}
|
|
}
|
|
|
|
@Book{ Bollobas.1999,
|
|
title = {Linear analysis},
|
|
author = {Bollob{\'a}s, B},
|
|
year = {1999},
|
|
publisher = {Cambridge University Press, Cambridge}
|
|
}
|
|
|
|
@Article{ Bowman.Angeli.Potts.ea.2015,
|
|
title = {A large annotated corpus for learning natural language
|
|
inference},
|
|
author = {Bowman, Samuel R and Angeli, Gabor and Potts, Christopher
|
|
and Manning, Christopher D},
|
|
journal = {arXiv preprint arXiv:1508.05326},
|
|
year = {2015}
|
|
}
|
|
|
|
@Book{ Boyd.Vandenberghe.2004,
|
|
address = {Cambridge, England},
|
|
author = {Stephen Boyd and Lieven Vandenberghe},
|
|
publisher = {Cambridge University Press},
|
|
title = {Convex Optimization},
|
|
year = 2004
|
|
}
|
|
|
|
@InProceedings{ Brown.Cocke.Della-Pietra.ea.1988,
|
|
title = {A statistical approach to language translation},
|
|
author = {Brown, Peter F and Cocke, John and Della Pietra, Stephen A
|
|
and Della Pietra, Vincent J and Jelinek, Frederick and
|
|
Mercer, Robert L and Roossin, Paul},
|
|
booktitle = {Coling Budapest 1988 Volume 1: International Conference on
|
|
Computational Linguistics},
|
|
year = {1988}
|
|
}
|
|
|
|
@Article{ Brown.Cocke.Della-Pietra.ea.1990,
|
|
title = {A statistical approach to machine translation},
|
|
author = {Brown, Peter F and Cocke, John and Della Pietra, Stephen A
|
|
and Della Pietra, Vincent J and Jelinek, Frederick and
|
|
Lafferty, John and Mercer, Robert L and Roossin, Paul S},
|
|
journal = {Computational linguistics},
|
|
volume = {16},
|
|
number = {2},
|
|
pages = {79--85},
|
|
year = {1990}
|
|
}
|
|
|
|
@InProceedings{ Brown.Sandholm.2017,
|
|
title = {Libratus: The Superhuman AI for No-Limit Poker.},
|
|
author = {Brown, Noam and Sandholm, Tuomas},
|
|
booktitle = {IJCAI},
|
|
pages = {5226--5228},
|
|
year = {2017}
|
|
}
|
|
|
|
@Article{ Campbell.Hoane-Jr.Hsu.2002,
|
|
title = {Deep blue},
|
|
author = {Campbell, Murray and Hoane Jr, A Joseph and Hsu,
|
|
Feng-hsiung},
|
|
journal = {Artificial intelligence},
|
|
volume = {134},
|
|
number = {1-2},
|
|
pages = {57--83},
|
|
year = {2002},
|
|
publisher = {Elsevier}
|
|
}
|
|
|
|
@InCollection{ Canny.1987,
|
|
title = {A computational approach to edge detection},
|
|
author = {Canny, John},
|
|
booktitle = {Readings in computer vision},
|
|
pages = {184--203},
|
|
year = {1987},
|
|
publisher = {Elsevier}
|
|
}
|
|
|
|
@InProceedings{ Cer.Diab.Agirre.ea.2017,
|
|
title = {SemEval-2017 Task 1: Semantic Textual Similarity
|
|
Multilingual and Crosslingual Focused Evaluation},
|
|
author = {Cer, Daniel and Diab, Mona and Agirre, Eneko and
|
|
Lopez-Gazpio, I{\~n}igo and Specia, Lucia},
|
|
booktitle = {Proceedings of the 11th International Workshop on Semantic
|
|
Evaluation (SemEval-2017)},
|
|
pages = {1--14},
|
|
year = {2017}
|
|
}
|
|
|
|
@InProceedings{ Cheng.Dong.Lapata.2016,
|
|
title = {Long Short-Term Memory-Networks for Machine Reading},
|
|
author = {Cheng, Jianpeng and Dong, Li and Lapata, Mirella},
|
|
booktitle = {Proceedings of the 2016 Conference on Empirical Methods in
|
|
Natural Language Processing},
|
|
pages = {551--561},
|
|
year = {2016}
|
|
}
|
|
|
|
@Article{ Cho.Van-Merrienboer.Bahdanau.ea.2014,
|
|
title = {On the properties of neural machine translation:
|
|
Encoder-decoder approaches},
|
|
author = {Cho, Kyunghyun and Van Merri{\"e}nboer, Bart and Bahdanau,
|
|
Dzmitry and Bengio, Yoshua},
|
|
journal = {arXiv preprint arXiv:1409.1259},
|
|
year = {2014}
|
|
}
|
|
|
|
@Article{ Cho.Van-Merrienboer.Gulcehre.ea.2014,
|
|
title = {Learning phrase representations using RNN encoder-decoder
|
|
for statistical machine translation},
|
|
author = {Cho, Kyunghyun and Van Merri{\"e}nboer, Bart and Gulcehre,
|
|
Caglar and Bahdanau, Dzmitry and Bougares, Fethi and
|
|
Schwenk, Holger and Bengio, Yoshua},
|
|
journal = {arXiv preprint arXiv:1406.1078},
|
|
year = {2014}
|
|
}
|
|
|
|
@Book{ Chowdhury.2010,
|
|
title = {Introduction to modern information retrieval},
|
|
author = {Chowdhury, Gobinda G},
|
|
year = {2010},
|
|
publisher = {Facet publishing}
|
|
}
|
|
|
|
@Article{ Chung.Gulcehre.Cho.ea.2014,
|
|
title = {Empirical evaluation of gated recurrent neural networks on
|
|
sequence modeling},
|
|
author = {Chung, Junyoung and Gulcehre, Caglar and Cho, KyungHyun
|
|
and Bengio, Yoshua},
|
|
journal = {arXiv preprint arXiv:1412.3555},
|
|
year = {2014}
|
|
}
|
|
|
|
@Article{ Collobert.Weston.Bottou.ea.2011,
|
|
title = {Natural language processing (almost) from scratch},
|
|
author = {Collobert, Ronan and Weston, Jason and Bottou, L{\'e}on
|
|
and Karlen, Michael and Kavukcuoglu, Koray and Kuksa,
|
|
Pavel},
|
|
journal = {Journal of machine learning research},
|
|
volume = {12},
|
|
number = {ARTICLE},
|
|
pages = {2493--2537},
|
|
year = {2011}
|
|
}
|
|
|
|
@Article{ Csiszar.2008,
|
|
title = {Axiomatic characterizations of information measures},
|
|
author = {Csisz{\'a}r, Imre},
|
|
journal = {Entropy},
|
|
volume = {10},
|
|
number = {3},
|
|
pages = {261--273},
|
|
year = {2008},
|
|
publisher = {Molecular Diversity Preservation International}
|
|
}
|
|
|
|
@InProceedings{ Dalal.Triggs.2005,
|
|
title = {Histograms of oriented gradients for human detection},
|
|
author = {Dalal, Navneet and Triggs, Bill},
|
|
booktitle = {2005 IEEE computer society conference on computer vision
|
|
and pattern recognition (CVPR'05)},
|
|
volume = {1},
|
|
pages = {886--893},
|
|
year = {2005},
|
|
organization = {IEEE}
|
|
}
|
|
|
|
@Article{ De-Cock.2011,
|
|
title = {Ames, Iowa: Alternative to the Boston housing data as an
|
|
end of semester regression project},
|
|
author = {De Cock, Dean},
|
|
journal = {Journal of Statistics Education},
|
|
volume = {19},
|
|
number = {3},
|
|
year = {2011},
|
|
publisher = {Taylor \& Francis}
|
|
}
|
|
|
|
@InProceedings{ DeCandia.Hastorun.Jampani.ea.2007,
|
|
title = {Dynamo: Amazon's highly available key-value store},
|
|
author = {DeCandia, Giuseppe and Hastorun, Deniz and Jampani, Madan
|
|
and Kakulapati, Gunavardhan and Lakshman, Avinash and
|
|
Pilchin, Alex and Sivasubramanian, Swaminathan and
|
|
Vosshall, Peter and Vogels, Werner},
|
|
booktitle = {ACM SIGOPS operating systems review},
|
|
volume = {41},
|
|
number = {6},
|
|
pages = {205--220},
|
|
year = {2007},
|
|
organization = {ACM}
|
|
}
|
|
|
|
@Article{ Devlin.Chang.Lee.ea.2018,
|
|
title = {Bert: Pre-training of deep bidirectional transformers for
|
|
language understanding},
|
|
author = {Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and
|
|
Toutanova, Kristina},
|
|
journal = {arXiv preprint arXiv:1810.04805},
|
|
year = {2018}
|
|
}
|
|
|
|
@InProceedings{ Doersch.Gupta.Efros.2015,
|
|
title = {Unsupervised visual representation learning by context
|
|
prediction},
|
|
author = {Doersch, Carl and Gupta, Abhinav and Efros, Alexei A},
|
|
booktitle = {Proceedings of the IEEE international conference on
|
|
computer vision},
|
|
pages = {1422--1430},
|
|
year = {2015}
|
|
}
|
|
|
|
@InProceedings{ Dosovitskiy.Beyer.Kolesnikov.ea.2021,
|
|
title = {An image is worth 16x16 words: Transformers for image
|
|
recognition at scale},
|
|
author = {Dosovitskiy, Alexey and Beyer, Lucas and Kolesnikov,
|
|
Alexander and Weissenborn, Dirk and Zhai, Xiaohua and
|
|
Unterthiner, Thomas and Dehghani, Mostafa and Minderer,
|
|
Matthias and Heigold, Georg and Gelly, Sylvain and others},
|
|
booktitle = {International Conference on Learning Representations},
|
|
year = {2021}
|
|
}
|
|
|
|
@InCollection{ Doucet.De-Freitas.Gordon.2001,
|
|
title = {An introduction to sequential Monte Carlo methods},
|
|
author = {Doucet, Arnaud and De Freitas, Nando and Gordon, Neil},
|
|
booktitle = {Sequential Monte Carlo methods in practice},
|
|
pages = {3--14},
|
|
year = {2001},
|
|
publisher = {Springer}
|
|
}
|
|
|
|
@Article{ Duchi.Hazan.Singer.2011,
|
|
title = {Adaptive subgradient methods for online learning and
|
|
stochastic optimization},
|
|
author = {Duchi, John and Hazan, Elad and Singer, Yoram},
|
|
journal = {Journal of Machine Learning Research},
|
|
volume = {12},
|
|
number = {Jul},
|
|
pages = {2121--2159},
|
|
year = {2011}
|
|
}
|
|
|
|
@Article{ Dumoulin.Visin.2016,
|
|
title = {A guide to convolution arithmetic for deep learning},
|
|
author = {Dumoulin, Vincent and Visin, Francesco},
|
|
journal = {arXiv preprint arXiv:1603.07285},
|
|
year = {2016}
|
|
}
|
|
|
|
@Article{ Edelman.Ostrovsky.Schwarz.2007,
|
|
title = {Internet advertising and the generalized second-price
|
|
auction: Selling billions of dollars worth of keywords},
|
|
author = {Edelman, Benjamin and Ostrovsky, Michael and Schwarz,
|
|
Michael},
|
|
journal = {American economic review},
|
|
volume = {97},
|
|
number = {1},
|
|
pages = {242--259},
|
|
year = {2007}
|
|
}
|
|
|
|
@InProceedings{ Flammarion.Bach.2015,
|
|
title = {From averaging to acceleration, there is only a
|
|
step-size},
|
|
author = {Flammarion, Nicolas and Bach, Francis},
|
|
booktitle = {Conference on Learning Theory},
|
|
pages = {658--695},
|
|
year = {2015}
|
|
}
|
|
|
|
@InProceedings{ Gatys.Ecker.Bethge.2016,
|
|
title = {Image style transfer using convolutional neural networks},
|
|
author = {Gatys, Leon A and Ecker, Alexander S and Bethge,
|
|
Matthias},
|
|
booktitle = {Proceedings of the IEEE conference on computer vision and
|
|
pattern recognition},
|
|
pages = {2414--2423},
|
|
year = {2016}
|
|
}
|
|
|
|
@Article{ Ginibre.1965,
|
|
title = {Statistical ensembles of complex, quaternion, and real
|
|
matrices},
|
|
author = {Ginibre, Jean},
|
|
journal = {Journal of Mathematical Physics},
|
|
volume = {6},
|
|
number = {3},
|
|
pages = {440--449},
|
|
year = {1965},
|
|
publisher = {AIP}
|
|
}
|
|
|
|
@InProceedings{ Girshick.2015,
|
|
title = {Fast r-cnn},
|
|
author = {Girshick, Ross},
|
|
booktitle = {Proceedings of the IEEE international conference on
|
|
computer vision},
|
|
pages = {1440--1448},
|
|
year = {2015}
|
|
}
|
|
|
|
@InProceedings{ Girshick.Donahue.Darrell.ea.2014,
|
|
title = {Rich feature hierarchies for accurate object detection and
|
|
semantic segmentation},
|
|
author = {Girshick, Ross and Donahue, Jeff and Darrell, Trevor and
|
|
Malik, Jitendra},
|
|
booktitle = {Proceedings of the IEEE conference on computer vision and
|
|
pattern recognition},
|
|
pages = {580--587},
|
|
year = {2014}
|
|
}
|
|
|
|
@InProceedings{ Glorot.Bengio.2010,
|
|
title = {Understanding the difficulty of training deep feedforward
|
|
neural networks},
|
|
author = {Glorot, Xavier and Bengio, Yoshua},
|
|
booktitle = {Proceedings of the thirteenth international conference on
|
|
artificial intelligence and statistics},
|
|
pages = {249--256},
|
|
year = {2010}
|
|
}
|
|
|
|
@Article{ Goh.2017,
|
|
author = {Goh, Gabriel},
|
|
title = {Why Momentum Really Works},
|
|
journal = {Distill},
|
|
year = {2017},
|
|
url = {http://distill.pub/2017/momentum},
|
|
doi = {10.23915/distill.00006}
|
|
}
|
|
|
|
@Article{ Goldberg.Nichols.Oki.ea.1992,
|
|
title = {Using collaborative filtering to weave an information
|
|
tapestry},
|
|
author = {Goldberg, David and Nichols, David and Oki, Brian M and
|
|
Terry, Douglas},
|
|
journal = {Communications of the ACM},
|
|
volume = {35},
|
|
number = {12},
|
|
pages = {61--71},
|
|
year = {1992},
|
|
publisher = {Association for Computing Machinery, Inc.}
|
|
}
|
|
|
|
@Book{ Goodfellow.Bengio.Courville.2016,
|
|
title = {Deep Learning},
|
|
author = {Ian Goodfellow and Yoshua Bengio and Aaron Courville},
|
|
publisher = {MIT Press},
|
|
note = {\url{http://www.deeplearningbook.org}},
|
|
year = {2016}
|
|
}
|
|
|
|
@InProceedings{ Goodfellow.Pouget-Abadie.Mirza.ea.2014,
|
|
title = {Generative adversarial nets},
|
|
author = {Goodfellow, Ian and Pouget-Abadie, Jean and Mirza, Mehdi
|
|
and Xu, Bing and Warde-Farley, David and Ozair, Sherjil and
|
|
Courville, Aaron and Bengio, Yoshua},
|
|
booktitle = {Advances in neural information processing systems},
|
|
pages = {2672--2680},
|
|
year = {2014}
|
|
}
|
|
|
|
@Article{ Gotmare.Keskar.Xiong.ea.2018,
|
|
title = {A Closer Look at Deep Learning Heuristics: Learning rate
|
|
restarts, Warmup and Distillation},
|
|
author = {Gotmare, Akhilesh and Keskar, Nitish Shirish and Xiong,
|
|
Caiming and Socher, Richard},
|
|
journal = {arXiv preprint arXiv:1810.13243},
|
|
year = {2018}
|
|
}
|
|
|
|
@Article{ Graves.2013,
|
|
title = {Generating sequences with recurrent neural networks},
|
|
author = {Graves, Alex},
|
|
journal = {arXiv preprint arXiv:1308.0850},
|
|
year = {2013}
|
|
}
|
|
|
|
@Article{ Graves.Schmidhuber.2005,
|
|
title = {Framewise phoneme classification with bidirectional LSTM
|
|
and other neural network architectures},
|
|
author = {Graves, Alex and Schmidhuber, J{\"u}rgen},
|
|
journal = {Neural networks},
|
|
volume = {18},
|
|
number = {5-6},
|
|
pages = {602--610},
|
|
year = {2005},
|
|
publisher = {Elsevier}
|
|
}
|
|
|
|
@InCollection{ Gunawardana.Shani.2015,
|
|
title = {Evaluating recommender systems},
|
|
author = {Gunawardana, Asela and Shani, Guy},
|
|
booktitle = {Recommender systems handbook},
|
|
pages = {265--308},
|
|
year = {2015},
|
|
publisher = {Springer}
|
|
}
|
|
|
|
@InProceedings{ Guo.Tang.Ye.ea.2017,
|
|
title = {DeepFM: a factorization-machine based neural network for
|
|
CTR prediction},
|
|
author = {Guo, Huifeng and Tang, Ruiming and Ye, Yunming and Li,
|
|
Zhenguo and He, Xiuqiang},
|
|
booktitle = {Proceedings of the 26th International Joint Conference on
|
|
Artificial Intelligence},
|
|
pages = {1725--1731},
|
|
year = {2017},
|
|
organization = {AAAI Press}
|
|
}
|
|
|
|
@Article{ Hadjis.Zhang.Mitliagkas.ea.2016,
|
|
title = {Omnivore: An optimizer for multi-device deep learning on
|
|
cpus and gpus},
|
|
author = {Hadjis, Stefan and Zhang, Ce and Mitliagkas, Ioannis and
|
|
Iter, Dan and R{\'e}, Christopher},
|
|
journal = {arXiv preprint arXiv:1606.04487},
|
|
year = {2016}
|
|
}
|
|
|
|
@InProceedings{ Hazan.Rakhlin.Bartlett.2008,
|
|
title = {Adaptive online gradient descent},
|
|
author = {Hazan, Elad and Rakhlin, Alexander and Bartlett, Peter L},
|
|
booktitle = {Advances in Neural Information Processing Systems},
|
|
pages = {65--72},
|
|
year = {2008}
|
|
}
|
|
|
|
@InProceedings{ He.Chua.2017,
|
|
title = {Neural factorization machines for sparse predictive
|
|
analytics},
|
|
author = {He, Xiangnan and Chua, Tat-Seng},
|
|
booktitle = {Proceedings of the 40th International ACM SIGIR conference
|
|
on Research and Development in Information Retrieval},
|
|
pages = {355--364},
|
|
year = {2017},
|
|
organization = {ACM}
|
|
}
|
|
|
|
@InProceedings{ He.Gkioxari.Dollar.ea.2017,
|
|
title = {Mask r-cnn},
|
|
author = {He, Kaiming and Gkioxari, Georgia and Doll{\'a}r, Piotr
|
|
and Girshick, Ross},
|
|
booktitle = {Proceedings of the IEEE international conference on
|
|
computer vision},
|
|
pages = {2961--2969},
|
|
year = {2017}
|
|
}
|
|
|
|
@InProceedings{ He.Liao.Zhang.ea.2017,
|
|
title = {Neural collaborative filtering},
|
|
author = {He, Xiangnan and Liao, Lizi and Zhang, Hanwang and Nie,
|
|
Liqiang and Hu, Xia and Chua, Tat-Seng},
|
|
booktitle = {Proceedings of the 26th international conference on world
|
|
wide web},
|
|
pages = {173--182},
|
|
year = {2017},
|
|
organization = {International World Wide Web Conferences Steering
|
|
Committee}
|
|
}
|
|
|
|
@InProceedings{ He.Zhang.Ren.ea.2015,
|
|
title = {Delving deep into rectifiers: Surpassing human-level
|
|
performance on imagenet classification},
|
|
author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun,
|
|
Jian},
|
|
booktitle = {Proceedings of the IEEE international conference on
|
|
computer vision},
|
|
pages = {1026--1034},
|
|
year = {2015}
|
|
}
|
|
|
|
@InProceedings{ He.Zhang.Ren.ea.2016,
|
|
title = {Deep residual learning for image recognition},
|
|
author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun,
|
|
Jian},
|
|
booktitle = {Proceedings of the IEEE conference on computer vision and
|
|
pattern recognition},
|
|
pages = {770--778},
|
|
year = {2016}
|
|
}
|
|
|
|
@InProceedings{ He.Zhang.Ren.ea.2016*1,
|
|
title = {Identity mappings in deep residual networks},
|
|
author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun,
|
|
Jian},
|
|
booktitle = {European conference on computer vision},
|
|
pages = {630--645},
|
|
year = {2016},
|
|
organization = {Springer}
|
|
}
|
|
|
|
@Book{ Hebb.Hebb.1949,
|
|
title = {The organization of behavior},
|
|
author = {Hebb, Donald Olding and Hebb, DO},
|
|
volume = {65},
|
|
year = {1949},
|
|
publisher = {Wiley New York}
|
|
}
|
|
|
|
@Article{ Hendrycks.Gimpel.2016,
|
|
title = {Gaussian error linear units (gelus)},
|
|
author = {Hendrycks, Dan and Gimpel, Kevin},
|
|
journal = {arXiv preprint arXiv:1606.08415},
|
|
year = {2016}
|
|
}
|
|
|
|
@Book{ Hennessy.Patterson.2011,
|
|
title = {Computer architecture: a quantitative approach},
|
|
author = {Hennessy, John L and Patterson, David A},
|
|
year = {2011},
|
|
publisher = {Elsevier}
|
|
}
|
|
|
|
@InProceedings{ Herlocker.Konstan.Borchers.ea.1999,
|
|
title = {An algorithmic framework for performing collaborative
|
|
filtering},
|
|
author = {Herlocker, Jonathan L and Konstan, Joseph A and Borchers,
|
|
Al and Riedl, John},
|
|
booktitle = {22nd Annual International ACM SIGIR Conference on Research
|
|
and Development in Information Retrieval, SIGIR 1999},
|
|
pages = {230--237},
|
|
year = {1999},
|
|
organization = {Association for Computing Machinery, Inc}
|
|
}
|
|
|
|
@Article{ Hidasi.Karatzoglou.Baltrunas.ea.2015,
|
|
title = {Session-based recommendations with recurrent neural
|
|
networks},
|
|
author = {Hidasi, Bal{\'a}zs and Karatzoglou, Alexandros and
|
|
Baltrunas, Linas and Tikk, Domonkos},
|
|
journal = {arXiv preprint arXiv:1511.06939},
|
|
year = {2015}
|
|
}
|
|
|
|
@Misc{ Hochreiter.Bengio.Frasconi.ea.2001,
|
|
title = {Gradient flow in recurrent nets: the difficulty of
|
|
learning long-term dependencies},
|
|
author = {Hochreiter, Sepp and Bengio, Yoshua and Frasconi, Paolo
|
|
and Schmidhuber, J{\"u}rgen and others},
|
|
year = {2001},
|
|
publisher = {A field guide to dynamical recurrent neural networks. IEEE
|
|
Press}
|
|
}
|
|
|
|
@Article{ Hochreiter.Schmidhuber.1997,
|
|
title = {Long short-term memory},
|
|
author = {Hochreiter, Sepp and Schmidhuber, J{\"u}rgen},
|
|
journal = {Neural computation},
|
|
volume = {9},
|
|
number = {8},
|
|
pages = {1735--1780},
|
|
year = {1997},
|
|
publisher = {MIT Press}
|
|
}
|
|
|
|
@InProceedings{ Hoyer.Janzing.Mooij.ea.2009,
|
|
title = {Nonlinear causal discovery with additive noise models},
|
|
author = {Hoyer, Patrik O and Janzing, Dominik and Mooij, Joris M
|
|
and Peters, Jonas and Sch{\"o}lkopf, Bernhard},
|
|
booktitle = {Advances in neural information processing systems},
|
|
pages = {689--696},
|
|
year = {2009}
|
|
}
|
|
|
|
@InProceedings{ Hu.Koren.Volinsky.2008,
|
|
title = {Collaborative filtering for implicit feedback datasets},
|
|
author = {Hu, Yifan and Koren, Yehuda and Volinsky, Chris},
|
|
booktitle = {2008 Eighth IEEE International Conference on Data Mining},
|
|
pages = {263--272},
|
|
year = {2008},
|
|
organization = {Ieee}
|
|
}
|
|
|
|
@Article{ Hu.Lee.Aggarwal.ea.2020,
|
|
title = {Text Style Transfer: A Review and Experimental
|
|
Evaluation},
|
|
author = {Hu, Zhiqiang and Lee, Roy Ka-Wei and Aggarwal, Charu C and
|
|
Zhang, Aston},
|
|
journal = {arXiv preprint arXiv:2010.12742},
|
|
year = {2020}
|
|
}
|
|
|
|
@InProceedings{ Hu.Shen.Sun.2018,
|
|
title = {Squeeze-and-excitation networks},
|
|
author = {Hu, Jie and Shen, Li and Sun, Gang},
|
|
booktitle = {Proceedings of the IEEE conference on computer vision and
|
|
pattern recognition},
|
|
pages = {7132--7141},
|
|
year = {2018}
|
|
}
|
|
|
|
@InProceedings{ Huang.Liu.Van-Der-Maaten.ea.2017,
|
|
title = {Densely connected convolutional networks},
|
|
author = {Huang, Gao and Liu, Zhuang and Van Der Maaten, Laurens and
|
|
Weinberger, Kilian Q},
|
|
booktitle = {Proceedings of the IEEE conference on computer vision and
|
|
pattern recognition},
|
|
pages = {4700--4708},
|
|
year = {2017}
|
|
}
|
|
|
|
@InProceedings{ Ioffe.2017,
|
|
title = {Batch renormalization: Towards reducing minibatch
|
|
dependence in batch-normalized models},
|
|
author = {Ioffe, Sergey},
|
|
booktitle = {Advances in neural information processing systems},
|
|
pages = {1945--1953},
|
|
year = {2017}
|
|
}
|
|
|
|
@Article{ Ioffe.Szegedy.2015,
|
|
title = {Batch normalization: Accelerating deep network training by
|
|
reducing internal covariate shift},
|
|
author = {Ioffe, Sergey and Szegedy, Christian},
|
|
journal = {arXiv preprint arXiv:1502.03167},
|
|
year = {2015}
|
|
}
|
|
|
|
@Article{ Izmailov.Podoprikhin.Garipov.ea.2018,
|
|
title = {Averaging weights leads to wider optima and better
|
|
generalization},
|
|
author = {Izmailov, Pavel and Podoprikhin, Dmitrii and Garipov,
|
|
Timur and Vetrov, Dmitry and Wilson, Andrew Gordon},
|
|
journal = {arXiv preprint arXiv:1803.05407},
|
|
year = {2018}
|
|
}
|
|
|
|
@Book{ Jaeger.2002,
|
|
title = {Tutorial on training recurrent neural networks, covering
|
|
BPPT, RTRL, EKF and the" echo state network" approach},
|
|
author = {Jaeger, Herbert},
|
|
volume = {5},
|
|
year = {2002},
|
|
publisher = {GMD-Forschungszentrum Informationstechnik Bonn}
|
|
}
|
|
|
|
@Book{ James.2007,
|
|
title = {The principles of psychology},
|
|
author = {James, William},
|
|
volume = {1},
|
|
year = {2007},
|
|
publisher = {Cosimo, Inc.}
|
|
}
|
|
|
|
@Article{ Jia.Song.He.ea.2018,
|
|
title = {Highly scalable deep learning training system with
|
|
mixed-precision: Training imagenet in four minutes},
|
|
author = {Jia, Xianyan and Song, Shutao and He, Wei and Wang,
|
|
Yangzihao and Rong, Haidong and Zhou, Feihu and Xie,
|
|
Liqiang and Guo, Zhenyu and Yang, Yuanzhou and Yu, Liwei
|
|
and others},
|
|
journal = {arXiv preprint arXiv:1807.11205},
|
|
year = {2018}
|
|
}
|
|
|
|
@InProceedings{ Jouppi.Young.Patil.ea.2017,
|
|
title = {In-datacenter performance analysis of a tensor processing
|
|
unit},
|
|
author = {Jouppi, Norman P and Young, Cliff and Patil, Nishant and
|
|
Patterson, David and Agrawal, Gaurav and Bajwa, Raminder
|
|
and Bates, Sarah and Bhatia, Suresh and Boden, Nan and
|
|
Borchers, Al and others},
|
|
booktitle = {2017 ACM/IEEE 44th Annual International Symposium on
|
|
Computer Architecture (ISCA)},
|
|
pages = {1--12},
|
|
year = {2017},
|
|
organization = {IEEE}
|
|
}
|
|
|
|
@Article{ Karras.Aila.Laine.ea.2017,
|
|
title = {Progressive growing of gans for improved quality,
|
|
stability, and variation},
|
|
author = {Karras, Tero and Aila, Timo and Laine, Samuli and
|
|
Lehtinen, Jaakko},
|
|
journal = {arXiv preprint arXiv:1710.10196},
|
|
year = {2017}
|
|
}
|
|
|
|
@Article{ Kim.2014,
|
|
title = {Convolutional neural networks for sentence
|
|
classification},
|
|
author = {Kim, Yoon},
|
|
journal = {arXiv preprint arXiv:1408.5882},
|
|
year = {2014}
|
|
}
|
|
|
|
@Article{ Kingma.Ba.2014,
|
|
title = {Adam: A method for stochastic optimization},
|
|
author = {Kingma, Diederik P and Ba, Jimmy},
|
|
journal = {arXiv preprint arXiv:1412.6980},
|
|
year = {2014}
|
|
}
|
|
|
|
@Book{ Koller.Friedman.2009,
|
|
title = {Probabilistic graphical models: principles and
|
|
techniques},
|
|
author = {Koller, Daphne and Friedman, Nir},
|
|
year = {2009},
|
|
publisher = {MIT press}
|
|
}
|
|
|
|
@Article{ Kolter.2008,
|
|
title = {Linear Algebra Review and Reference},
|
|
author = {Kolter, Zico},
|
|
journal = {Available online: http},
|
|
year = {2008}
|
|
}
|
|
|
|
@InProceedings{ Koren.2009,
|
|
title = {Collaborative filtering with temporal dynamics},
|
|
author = {Koren, Yehuda},
|
|
booktitle = {Proceedings of the 15th ACM SIGKDD international
|
|
conference on Knowledge discovery and data mining},
|
|
pages = {447--456},
|
|
year = {2009},
|
|
organization = {ACM}
|
|
}
|
|
|
|
@Article{ Koren.Bell.Volinsky.2009,
|
|
title = {Matrix factorization techniques for recommender systems},
|
|
author = {Koren, Yehuda and Bell, Robert and Volinsky, Chris},
|
|
journal = {Computer},
|
|
number = {8},
|
|
pages = {30--37},
|
|
year = {2009},
|
|
publisher = {IEEE}
|
|
}
|
|
|
|
@InProceedings{ Krizhevsky.Sutskever.Hinton.2012,
|
|
title = {Imagenet classification with deep convolutional neural
|
|
networks},
|
|
author = {Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey
|
|
E},
|
|
booktitle = {Advances in neural information processing systems},
|
|
pages = {1097--1105},
|
|
year = {2012}
|
|
}
|
|
|
|
@Article{ Kung.1988,
|
|
title = {VLSI array processors},
|
|
author = {Kung, Sun Yuan},
|
|
journal = {Englewood Cliffs, NJ, Prentice Hall, 1988, 685 p. Research
|
|
supported by the Semiconductor Research Corp., SDIO, NSF,
|
|
and US Navy.},
|
|
year = {1988}
|
|
}
|
|
|
|
@Article{ LeCun.Bottou.Bengio.ea.1998,
|
|
title = {Gradient-based learning applied to document recognition},
|
|
author = {LeCun, Yann and Bottou, L{\'e}on and Bengio, Yoshua and
|
|
Haffner, Patrick and others},
|
|
journal = {Proceedings of the IEEE},
|
|
volume = {86},
|
|
number = {11},
|
|
pages = {2278--2324},
|
|
year = {1998},
|
|
publisher = {Taipei, Taiwan}
|
|
}
|
|
|
|
@PhDThesis{ Li.2017,
|
|
title = {Scaling Distributed Machine Learning with System and
|
|
Algorithm Co-design},
|
|
author = {Li, Mu},
|
|
year = {2017},
|
|
school = {PhD Thesis, CMU}
|
|
}
|
|
|
|
@InProceedings{ Li.Andersen.Park.ea.2014,
|
|
title = {Scaling distributed machine learning with the parameter
|
|
server},
|
|
author = {Li, Mu and Andersen, David G and Park, Jun Woo and Smola,
|
|
Alexander J and Ahmed, Amr and Josifovski, Vanja and Long,
|
|
James and Shekita, Eugene J and Su, Bor-Yiing},
|
|
booktitle = {11th $\{$USENIX$\}$ Symposium on Operating Systems Design
|
|
and Implementation ($\{$OSDI$\}$ 14)},
|
|
pages = {583--598},
|
|
year = {2014}
|
|
}
|
|
|
|
@Article{ Lin.Chen.Yan.2013,
|
|
title = {Network in network},
|
|
author = {Lin, Min and Chen, Qiang and Yan, Shuicheng},
|
|
journal = {arXiv preprint arXiv:1312.4400},
|
|
year = {2013}
|
|
}
|
|
|
|
@Article{ Lin.Feng.Santos.ea.2017,
|
|
title = {A structured self-attentive sentence embedding},
|
|
author = {Lin, Zhouhan and Feng, Minwei and Santos, Cicero Nogueira
|
|
dos and Yu, Mo and Xiang, Bing and Zhou, Bowen and Bengio,
|
|
Yoshua},
|
|
journal = {arXiv preprint arXiv:1703.03130},
|
|
year = {2017}
|
|
}
|
|
|
|
@InProceedings{ Lin.Goyal.Girshick.ea.2017,
|
|
title = {Focal loss for dense object detection},
|
|
author = {Lin, Tsung-Yi and Goyal, Priya and Girshick, Ross and He,
|
|
Kaiming and Doll{\'a}r, Piotr},
|
|
booktitle = {Proceedings of the IEEE international conference on
|
|
computer vision},
|
|
pages = {2980--2988},
|
|
year = {2017}
|
|
}
|
|
|
|
@Article{ Lin.Lv.Zhu.ea.2010,
|
|
title = {Imagenet classification: fast descriptor coding and
|
|
large-scale svm training},
|
|
author = {Lin, Yuanqing and Lv, F and Zhu, S and Yang, M and Cour, T
|
|
and Yu, K and Cao, L and Li, Z and Tsai, MH and Zhou, X and
|
|
others},
|
|
journal = {Large scale visual recognition challenge},
|
|
year = {2010}
|
|
}
|
|
|
|
@Article{ Lipton.Steinhardt.2018,
|
|
title = {Troubling trends in machine learning scholarship},
|
|
author = {Lipton, Zachary C and Steinhardt, Jacob},
|
|
journal = {arXiv preprint arXiv:1807.03341},
|
|
year = {2018}
|
|
}
|
|
|
|
@InProceedings{ Liu.Anguelov.Erhan.ea.2016,
|
|
title = {Ssd: Single shot multibox detector},
|
|
author = {Liu, Wei and Anguelov, Dragomir and Erhan, Dumitru and
|
|
Szegedy, Christian and Reed, Scott and Fu, Cheng-Yang and
|
|
Berg, Alexander C},
|
|
booktitle = {European conference on computer vision},
|
|
pages = {21--37},
|
|
year = {2016},
|
|
organization = {Springer}
|
|
}
|
|
|
|
@Article{ Liu.Ott.Goyal.ea.2019,
|
|
title = {Roberta: A robustly optimized bert pretraining approach},
|
|
author = {Liu, Yinhan and Ott, Myle and Goyal, Naman and Du, Jingfei
|
|
and Joshi, Mandar and Chen, Danqi and Levy, Omer and Lewis,
|
|
Mike and Zettlemoyer, Luke and Stoyanov, Veselin},
|
|
journal = {arXiv preprint arXiv:1907.11692},
|
|
year = {2019}
|
|
}
|
|
|
|
@InProceedings{ Long.Shelhamer.Darrell.2015,
|
|
title = {Fully convolutional networks for semantic segmentation},
|
|
author = {Long, Jonathan and Shelhamer, Evan and Darrell, Trevor},
|
|
booktitle = {Proceedings of the IEEE conference on computer vision and
|
|
pattern recognition},
|
|
pages = {3431--3440},
|
|
year = {2015}
|
|
}
|
|
|
|
@Article{ Loshchilov.Hutter.2016,
|
|
title = {Sgdr: Stochastic gradient descent with warm restarts},
|
|
author = {Loshchilov, Ilya and Hutter, Frank},
|
|
journal = {arXiv preprint arXiv:1608.03983},
|
|
year = {2016}
|
|
}
|
|
|
|
@Article{ Lowe.2004,
|
|
title = {Distinctive image features from scale-invariant
|
|
keypoints},
|
|
author = {Lowe, David G},
|
|
journal = {International journal of computer vision},
|
|
volume = {60},
|
|
number = {2},
|
|
pages = {91--110},
|
|
year = {2004},
|
|
publisher = {Springer}
|
|
}
|
|
|
|
@Article{ Luo.Wang.Shao.ea.2018,
|
|
title = {Towards understanding regularization in batch
|
|
normalization},
|
|
author = {Luo, Ping and Wang, Xinjiang and Shao, Wenqi and Peng,
|
|
Zhanglin},
|
|
journal = {arXiv preprint},
|
|
year = {2018}
|
|
}
|
|
|
|
@InProceedings{ Maas.Daly.Pham.ea.2011,
|
|
title = {Learning word vectors for sentiment analysis},
|
|
author = {Maas, Andrew L and Daly, Raymond E and Pham, Peter T and
|
|
Huang, Dan and Ng, Andrew Y and Potts, Christopher},
|
|
booktitle = {Proceedings of the 49th annual meeting of the association
|
|
for computational linguistics: Human language
|
|
technologies-volume 1},
|
|
pages = {142--150},
|
|
year = {2011},
|
|
organization = {Association for Computational Linguistics}
|
|
}
|
|
|
|
@InProceedings{ McCann.Bradbury.Xiong.ea.2017,
|
|
title = {Learned in translation: Contextualized word vectors},
|
|
author = {McCann, Bryan and Bradbury, James and Xiong, Caiming and
|
|
Socher, Richard},
|
|
booktitle = {Advances in Neural Information Processing Systems},
|
|
pages = {6294--6305},
|
|
year = {2017}
|
|
}
|
|
|
|
@Article{ McCulloch.Pitts.1943,
|
|
title = {A logical calculus of the ideas immanent in nervous
|
|
activity},
|
|
author = {McCulloch, Warren S and Pitts, Walter},
|
|
journal = {The bulletin of mathematical biophysics},
|
|
volume = {5},
|
|
number = {4},
|
|
pages = {115--133},
|
|
year = {1943},
|
|
publisher = {Springer}
|
|
}
|
|
|
|
@InProceedings{ McMahan.Holt.Sculley.ea.2013,
|
|
title = {Ad click prediction: a view from the trenches},
|
|
author = {McMahan, H Brendan and Holt, Gary and Sculley, David and
|
|
Young, Michael and Ebner, Dietmar and Grady, Julian and
|
|
Nie, Lan and Phillips, Todd and Davydov, Eugene and
|
|
Golovin, Daniel and others},
|
|
booktitle = {Proceedings of the 19th ACM SIGKDD international
|
|
conference on Knowledge discovery and data mining},
|
|
pages = {1222--1230},
|
|
year = {2013},
|
|
organization = {ACM}
|
|
}
|
|
|
|
@Article{ Merity.Xiong.Bradbury.ea.2016,
|
|
title = {Pointer sentinel mixture models},
|
|
author = {Merity, Stephen and Xiong, Caiming and Bradbury, James and
|
|
Socher, Richard},
|
|
journal = {arXiv preprint arXiv:1609.07843},
|
|
year = {2016}
|
|
}
|
|
|
|
@Article{ Mikolov.Chen.Corrado.ea.2013,
|
|
title = {Efficient estimation of word representations in vector
|
|
space},
|
|
author = {Mikolov, Tomas and Chen, Kai and Corrado, Greg and Dean,
|
|
Jeffrey},
|
|
journal = {arXiv preprint arXiv:1301.3781},
|
|
year = {2013}
|
|
}
|
|
|
|
@InProceedings{ Mikolov.Sutskever.Chen.ea.2013,
|
|
title = {Distributed representations of words and phrases and their
|
|
compositionality},
|
|
author = {Mikolov, Tomas and Sutskever, Ilya and Chen, Kai and
|
|
Corrado, Greg S and Dean, Jeff},
|
|
booktitle = {Advances in neural information processing systems},
|
|
pages = {3111--3119},
|
|
year = {2013}
|
|
}
|
|
|
|
@InProceedings{ Mirhoseini.Pham.Le.ea.2017,
|
|
title = {Device placement optimization with reinforcement
|
|
learning},
|
|
author = {Mirhoseini, Azalia and Pham, Hieu and Le, Quoc V and
|
|
Steiner, Benoit and Larsen, Rasmus and Zhou, Yuefeng and
|
|
Kumar, Naveen and Norouzi, Mohammad and Bengio, Samy and
|
|
Dean, Jeff},
|
|
booktitle = {Proceedings of the 34th International Conference on
|
|
Machine Learning-Volume 70},
|
|
pages = {2430--2439},
|
|
year = {2017},
|
|
organization = {JMLR. org}
|
|
}
|
|
|
|
@InProceedings{ Mnih.Heess.Graves.ea.2014,
|
|
title = {Recurrent models of visual attention},
|
|
author = {Mnih, Volodymyr and Heess, Nicolas and Graves, Alex and
|
|
others},
|
|
booktitle = {Advances in neural information processing systems},
|
|
pages = {2204--2212},
|
|
year = {2014}
|
|
}
|
|
|
|
@Article{ Morey.Hoekstra.Rouder.ea.2016,
|
|
title = {The fallacy of placing confidence in confidence
|
|
intervals},
|
|
author = {Morey, Richard D and Hoekstra, Rink and Rouder, Jeffrey N
|
|
and Lee, Michael D and Wagenmakers, Eric-Jan},
|
|
journal = {Psychonomic bulletin \& review},
|
|
volume = {23},
|
|
number = {1},
|
|
pages = {103--123},
|
|
year = {2016},
|
|
publisher = {Springer}
|
|
}
|
|
|
|
@Article{ Nadaraya.1964,
|
|
title = {On estimating regression},
|
|
author = {Nadaraya, Elizbar A},
|
|
journal = {Theory of Probability \& Its Applications},
|
|
volume = {9},
|
|
number = {1},
|
|
pages = {141--142},
|
|
year = {1964},
|
|
publisher = {SIAM}
|
|
}
|
|
|
|
@Book{ Nesterov.2018,
|
|
title = {Lectures on convex optimization},
|
|
author = {Nesterov, Yurii},
|
|
volume = {137},
|
|
year = {2018},
|
|
publisher = {Springer}
|
|
}
|
|
|
|
@Misc{ Nesterov.Vial.2000,
|
|
title = {Confidence level solutions for stochastic programming,
|
|
Stochastic Programming E-Print Series},
|
|
author = {Nesterov, Yu and Vial, J-Ph},
|
|
year = {2000}
|
|
}
|
|
|
|
@Article{ Neyman.1937,
|
|
title = {Outline of a theory of statistical estimation based on the
|
|
classical theory of probability},
|
|
author = {Neyman, Jerzy},
|
|
journal = {Philosophical Transactions of the Royal Society of London.
|
|
Series A, Mathematical and Physical Sciences},
|
|
volume = {236},
|
|
number = {767},
|
|
pages = {333--380},
|
|
year = {1937},
|
|
publisher = {The Royal Society London}
|
|
}
|
|
|
|
@InProceedings{ Papineni.Roukos.Ward.ea.2002,
|
|
title = {BLEU: a method for automatic evaluation of machine
|
|
translation},
|
|
author = {Papineni, Kishore and Roukos, Salim and Ward, Todd and
|
|
Zhu, Wei-Jing},
|
|
booktitle = {Proceedings of the 40th annual meeting of the Association
|
|
for Computational Linguistics},
|
|
pages = {311--318},
|
|
year = {2002}
|
|
}
|
|
|
|
@Article{ Parikh.Tackstrom.Das.ea.2016,
|
|
title = {A decomposable attention model for natural language
|
|
inference},
|
|
author = {Parikh, Ankur P and T{\"a}ckstr{\"o}m, Oscar and Das,
|
|
Dipanjan and Uszkoreit, Jakob},
|
|
journal = {arXiv preprint arXiv:1606.01933},
|
|
year = {2016}
|
|
}
|
|
|
|
@InProceedings{ Park.Liu.Wang.ea.2019,
|
|
title = {Semantic image synthesis with spatially-adaptive
|
|
normalization},
|
|
author = {Park, Taesung and Liu, Ming-Yu and Wang, Ting-Chun and
|
|
Zhu, Jun-Yan},
|
|
booktitle = {Proceedings of the IEEE Conference on Computer Vision and
|
|
Pattern Recognition},
|
|
pages = {2337--2346},
|
|
year = {2019}
|
|
}
|
|
|
|
@Article{ Paulus.Xiong.Socher.2017,
|
|
title = {A deep reinforced model for abstractive summarization},
|
|
author = {Paulus, Romain and Xiong, Caiming and Socher, Richard},
|
|
journal = {arXiv preprint arXiv:1705.04304},
|
|
year = {2017}
|
|
}
|
|
|
|
@InProceedings{ Pennington.Schoenholz.Ganguli.2017,
|
|
title = {Resurrecting the sigmoid in deep learning through
|
|
dynamical isometry: theory and practice},
|
|
author = {Pennington, Jeffrey and Schoenholz, Samuel and Ganguli,
|
|
Surya},
|
|
booktitle = {Advances in neural information processing systems},
|
|
pages = {4785--4795},
|
|
year = {2017}
|
|
}
|
|
|
|
@InProceedings{ Pennington.Socher.Manning.2014,
|
|
title = {Glove: Global vectors for word representation},
|
|
author = {Pennington, Jeffrey and Socher, Richard and Manning,
|
|
Christopher},
|
|
booktitle = {Proceedings of the 2014 conference on empirical methods in
|
|
natural language processing (EMNLP)},
|
|
pages = {1532--1543},
|
|
year = {2014}
|
|
}
|
|
|
|
@InProceedings{ Peters.Ammar.Bhagavatula.ea.2017,
|
|
title = {Semi-supervised sequence tagging with bidirectional
|
|
language models},
|
|
author = {Peters, Matthew and Ammar, Waleed and Bhagavatula, Chandra
|
|
and Power, Russell},
|
|
booktitle = {Proceedings of the 55th Annual Meeting of the Association
|
|
for Computational Linguistics (Volume 1: Long Papers)},
|
|
pages = {1756--1765},
|
|
year = {2017}
|
|
}
|
|
|
|
@Book{ Peters.Janzing.Scholkopf.2017,
|
|
title = {Elements of causal inference: foundations and learning
|
|
algorithms},
|
|
author = {Peters, Jonas and Janzing, Dominik and Sch{\"o}lkopf,
|
|
Bernhard},
|
|
year = {2017},
|
|
publisher = {MIT press}
|
|
}
|
|
|
|
@InProceedings{ Peters.Neumann.Iyyer.ea.2018,
|
|
title = {Deep Contextualized Word Representations},
|
|
author = {Peters, Matthew and Neumann, Mark and Iyyer, Mohit and
|
|
Gardner, Matt and Clark, Christopher and Lee, Kenton and
|
|
Zettlemoyer, Luke},
|
|
booktitle = {Proceedings of the 2018 Conference of the North American
|
|
Chapter of the Association for Computational Linguistics:
|
|
Human Language Technologies, Volume 1 (Long Papers)},
|
|
pages = {2227--2237},
|
|
year = {2018}
|
|
}
|
|
|
|
@Article{ Petersen.Pedersen.ea.2008,
|
|
title = {The matrix cookbook},
|
|
author = {Petersen, Kaare Brandt and Pedersen, Michael Syskind and
|
|
others},
|
|
journal = {Technical University of Denmark},
|
|
volume = {7},
|
|
number = {15},
|
|
pages = {510},
|
|
year = {2008}
|
|
}
|
|
|
|
@Article{ Polyak.1964,
|
|
title = {Some methods of speeding up the convergence of iteration
|
|
methods},
|
|
author = {Polyak, Boris T},
|
|
journal = {USSR Computational Mathematics and Mathematical Physics},
|
|
volume = {4},
|
|
number = {5},
|
|
pages = {1--17},
|
|
year = {1964},
|
|
publisher = {Elsevier}
|
|
}
|
|
|
|
@Article{ Quadrana.Cremonesi.Jannach.2018,
|
|
title = {Sequence-aware recommender systems},
|
|
author = {Quadrana, Massimo and Cremonesi, Paolo and Jannach,
|
|
Dietmar},
|
|
journal = {ACM Computing Surveys (CSUR)},
|
|
volume = {51},
|
|
number = {4},
|
|
pages = {66},
|
|
year = {2018},
|
|
publisher = {ACM}
|
|
}
|
|
|
|
@Article{ Radford.Metz.Chintala.2015,
|
|
title = {Unsupervised representation learning with deep
|
|
convolutional generative adversarial networks},
|
|
author = {Radford, Alec and Metz, Luke and Chintala, Soumith},
|
|
journal = {arXiv preprint arXiv:1511.06434},
|
|
year = {2015}
|
|
}
|
|
|
|
@Article{ Radford.Narasimhan.Salimans.ea.2018,
|
|
title = {Improving language understanding by generative
|
|
pre-training},
|
|
author = {Radford, Alec and Narasimhan, Karthik and Salimans, Tim
|
|
and Sutskever, Ilya},
|
|
journal = {OpenAI},
|
|
year = {2018}
|
|
}
|
|
|
|
@Article{ Radford.Wu.Child.ea.2019,
|
|
title = {Language models are unsupervised multitask learners},
|
|
author = {Radford, Alec and Wu, Jeffrey and Child, Rewon and Luan,
|
|
David and Amodei, Dario and Sutskever, Ilya},
|
|
journal = {OpenAI Blog},
|
|
volume = {1},
|
|
number = {8},
|
|
pages = {9},
|
|
year = {2019}
|
|
}
|
|
|
|
@Article{ Rajpurkar.Zhang.Lopyrev.ea.2016,
|
|
title = {Squad: 100,000+ questions for machine comprehension of
|
|
text},
|
|
author = {Rajpurkar, Pranav and Zhang, Jian and Lopyrev, Konstantin
|
|
and Liang, Percy},
|
|
journal = {arXiv preprint arXiv:1606.05250},
|
|
year = {2016}
|
|
}
|
|
|
|
@Article{ Reddi.Kale.Kumar.2019,
|
|
title = {On the convergence of Adam and beyond},
|
|
author = {Reddi, Sashank J and Kale, Satyen and Kumar, Sanjiv},
|
|
journal = {arXiv preprint arXiv:1904.09237},
|
|
year = {2019}
|
|
}
|
|
|
|
@InProceedings{ Redmon.Divvala.Girshick.ea.2016,
|
|
title = {You only look once: Unified, real-time object detection},
|
|
author = {Redmon, Joseph and Divvala, Santosh and Girshick, Ross and
|
|
Farhadi, Ali},
|
|
booktitle = {Proceedings of the IEEE conference on computer vision and
|
|
pattern recognition},
|
|
pages = {779--788},
|
|
year = {2016}
|
|
}
|
|
|
|
@Article{ Reed.De-Freitas.2015,
|
|
title = {Neural programmer-interpreters},
|
|
author = {Reed, Scott and De Freitas, Nando},
|
|
journal = {arXiv preprint arXiv:1511.06279},
|
|
year = {2015}
|
|
}
|
|
|
|
@InProceedings{ Ren.He.Girshick.ea.2015,
|
|
title = {Faster r-cnn: Towards real-time object detection with
|
|
region proposal networks},
|
|
author = {Ren, Shaoqing and He, Kaiming and Girshick, Ross and Sun,
|
|
Jian},
|
|
booktitle = {Advances in neural information processing systems},
|
|
pages = {91--99},
|
|
year = {2015}
|
|
}
|
|
|
|
@InProceedings{ Rendle.2010,
|
|
title = {Factorization machines},
|
|
author = {Rendle, Steffen},
|
|
booktitle = {2010 IEEE International Conference on Data Mining},
|
|
pages = {995--1000},
|
|
year = {2010},
|
|
organization = {IEEE}
|
|
}
|
|
|
|
@InProceedings{ Rendle.Freudenthaler.Gantner.ea.2009,
|
|
title = {BPR: Bayesian personalized ranking from implicit
|
|
feedback},
|
|
author = {Rendle, Steffen and Freudenthaler, Christoph and Gantner,
|
|
Zeno and Schmidt-Thieme, Lars},
|
|
booktitle = {Proceedings of the twenty-fifth conference on uncertainty
|
|
in artificial intelligence},
|
|
pages = {452--461},
|
|
year = {2009},
|
|
organization = {AUAI Press}
|
|
}
|
|
|
|
@Article{ Rumelhart.Hinton.Williams.ea.1988,
|
|
title = {Learning representations by back-propagating errors},
|
|
author = {Rumelhart, David E and Hinton, Geoffrey E and Williams,
|
|
Ronald J and others},
|
|
journal = {Cognitive modeling},
|
|
volume = {5},
|
|
number = {3},
|
|
pages = {1},
|
|
year = {1988}
|
|
}
|
|
|
|
@Book{ Russell.Norvig.2016,
|
|
title = {Artificial intelligence: a modern approach},
|
|
author = {Russell, Stuart J and Norvig, Peter},
|
|
year = {2016},
|
|
publisher = {Malaysia; Pearson Education Limited,}
|
|
}
|
|
|
|
@Article{ Salton.Wong.Yang.1975,
|
|
title = {A vector space model for automatic indexing},
|
|
author = {Salton, Gerard and Wong, Anita and Yang, Chung-Shu},
|
|
journal = {Communications of the ACM},
|
|
volume = {18},
|
|
number = {11},
|
|
pages = {613--620},
|
|
year = {1975},
|
|
publisher = {ACM}
|
|
}
|
|
|
|
@InProceedings{ Santurkar.Tsipras.Ilyas.ea.2018,
|
|
title = {How does batch normalization help optimization?},
|
|
author = {Santurkar, Shibani and Tsipras, Dimitris and Ilyas, Andrew
|
|
and Madry, Aleksander},
|
|
booktitle = {Advances in Neural Information Processing Systems},
|
|
pages = {2483--2493},
|
|
year = {2018}
|
|
}
|
|
|
|
@Article{ Sarwar.Karypis.Konstan.ea.2001,
|
|
title = {Item-based collaborative filtering recommendation
|
|
algorithms.},
|
|
author = {Sarwar, Badrul Munir and Karypis, George and Konstan,
|
|
Joseph A and Riedl, John and others},
|
|
journal = {Www},
|
|
volume = {1},
|
|
pages = {285--295},
|
|
year = {2001}
|
|
}
|
|
|
|
@InProceedings{ Schein.Popescul.Ungar.ea.2002,
|
|
title = {Methods and metrics for cold-start recommendations},
|
|
author = {Schein, Andrew I and Popescul, Alexandrin and Ungar, Lyle
|
|
H and Pennock, David M},
|
|
booktitle = {Proceedings of the 25th annual international ACM SIGIR
|
|
conference on Research and development in information
|
|
retrieval},
|
|
pages = {253--260},
|
|
year = {2002},
|
|
organization = {ACM}
|
|
}
|
|
|
|
@Article{ Schuster.Paliwal.1997,
|
|
title = {Bidirectional recurrent neural networks},
|
|
author = {Schuster, Mike and Paliwal, Kuldip K},
|
|
journal = {IEEE Transactions on Signal Processing},
|
|
volume = {45},
|
|
number = {11},
|
|
pages = {2673--2681},
|
|
year = {1997},
|
|
publisher = {IEEE}
|
|
}
|
|
|
|
@InProceedings{ Sedhain.Menon.Sanner.ea.2015,
|
|
title = {Autorec: Autoencoders meet collaborative filtering},
|
|
author = {Sedhain, Suvash and Menon, Aditya Krishna and Sanner,
|
|
Scott and Xie, Lexing},
|
|
booktitle = {Proceedings of the 24th International Conference on World
|
|
Wide Web},
|
|
pages = {111--112},
|
|
year = {2015},
|
|
organization = {ACM}
|
|
}
|
|
|
|
@Article{ Sennrich.Haddow.Birch.2015,
|
|
title = {Neural machine translation of rare words with subword
|
|
units},
|
|
author = {Sennrich, Rico and Haddow, Barry and Birch, Alexandra},
|
|
journal = {arXiv preprint arXiv:1508.07909},
|
|
year = {2015}
|
|
}
|
|
|
|
@Article{ Sergeev.Del-Balso.2018,
|
|
title = {Horovod: fast and easy distributed deep learning in
|
|
TensorFlow},
|
|
author = {Sergeev, Alexander and Del Balso, Mike},
|
|
journal = {arXiv preprint arXiv:1802.05799},
|
|
year = {2018}
|
|
}
|
|
|
|
@Article{ Shannon.1948,
|
|
author = {Shannon, Claude Elwood},
|
|
journal = {The Bell System Technical Journal},
|
|
month = {7},
|
|
number = 3,
|
|
pages = {379--423},
|
|
publisher = {Nokia Bell Labs},
|
|
title = {A Mathematical Theory of Communication},
|
|
volume = 27,
|
|
year = 1948
|
|
}
|
|
|
|
@InProceedings{ Shao.Yao.Sun.ea.2020,
|
|
title = {ControlVAE: Controllable Variational Autoencoder},
|
|
author = {Shao, Huajie and Yao, Shuochao and Sun, Dachun and Zhang,
|
|
Aston and Liu, Shengzhong and Liu, Dongxin and Wang, Jun
|
|
and Abdelzaher, Tarek},
|
|
booktitle = {Proceedings of the 37th International Conference on
|
|
Machine Learning},
|
|
year = {2020},
|
|
organization = {JMLR. org}
|
|
}
|
|
|
|
@Article{ Silver.Huang.Maddison.ea.2016,
|
|
title = {Mastering the game of Go with deep neural networks and
|
|
tree search},
|
|
author = {Silver, David and Huang, Aja and Maddison, Chris J and
|
|
Guez, Arthur and Sifre, Laurent and Van Den Driessche,
|
|
George and Schrittwieser, Julian and Antonoglou, Ioannis
|
|
and Panneershelvam, Veda and Lanctot, Marc and others},
|
|
journal = {nature},
|
|
volume = {529},
|
|
number = {7587},
|
|
pages = {484},
|
|
year = {2016},
|
|
publisher = {Nature Publishing Group}
|
|
}
|
|
|
|
@Article{ Simonyan.Zisserman.2014,
|
|
title = {Very deep convolutional networks for large-scale image
|
|
recognition},
|
|
author = {Simonyan, Karen and Zisserman, Andrew},
|
|
journal = {arXiv preprint arXiv:1409.1556},
|
|
year = {2014}
|
|
}
|
|
|
|
@Article{ Smola.Narayanamurthy.2010,
|
|
title = {An architecture for parallel topic models},
|
|
author = {Smola, Alexander and Narayanamurthy, Shravan},
|
|
journal = {Proceedings of the VLDB Endowment},
|
|
volume = {3},
|
|
number = {1-2},
|
|
pages = {703--710},
|
|
year = {2010},
|
|
publisher = {VLDB Endowment}
|
|
}
|
|
|
|
@Article{ Srivastava.Hinton.Krizhevsky.ea.2014,
|
|
title = {Dropout: a simple way to prevent neural networks from
|
|
overfitting},
|
|
author = {Srivastava, Nitish and Hinton, Geoffrey and Krizhevsky,
|
|
Alex and Sutskever, Ilya and Salakhutdinov, Ruslan},
|
|
journal = {The Journal of Machine Learning Research},
|
|
volume = {15},
|
|
number = {1},
|
|
pages = {1929--1958},
|
|
year = {2014},
|
|
publisher = {JMLR. org}
|
|
}
|
|
|
|
@Book{ Strang.1993,
|
|
title = {Introduction to linear algebra},
|
|
author = {Strang, Gilbert},
|
|
volume = {3},
|
|
year = {1993},
|
|
publisher = {Wellesley-Cambridge Press Wellesley, MA}
|
|
}
|
|
|
|
@Article{ Su.Khoshgoftaar.2009,
|
|
title = {A survey of collaborative filtering techniques},
|
|
author = {Su, Xiaoyuan and Khoshgoftaar, Taghi M},
|
|
journal = {Advances in artificial intelligence},
|
|
volume = {2009},
|
|
year = {2009},
|
|
publisher = {Hindawi}
|
|
}
|
|
|
|
@InProceedings{ Sukhbaatar.Weston.Fergus.ea.2015,
|
|
title = {End-to-end memory networks},
|
|
author = {Sukhbaatar, Sainbayar and Weston, Jason and Fergus, Rob
|
|
and others},
|
|
booktitle = {Advances in neural information processing systems},
|
|
pages = {2440--2448},
|
|
year = {2015}
|
|
}
|
|
|
|
@InProceedings{ Sutskever.Martens.Dahl.ea.2013,
|
|
title = {On the importance of initialization and momentum in deep
|
|
learning},
|
|
author = {Sutskever, Ilya and Martens, James and Dahl, George and
|
|
Hinton, Geoffrey},
|
|
booktitle = {International conference on machine learning},
|
|
pages = {1139--1147},
|
|
year = {2013}
|
|
}
|
|
|
|
@InProceedings{ Sutskever.Vinyals.Le.2014,
|
|
title = {Sequence to sequence learning with neural networks},
|
|
author = {Sutskever, Ilya and Vinyals, Oriol and Le, Quoc V},
|
|
booktitle = {Advances in neural information processing systems},
|
|
pages = {3104--3112},
|
|
year = {2014}
|
|
}
|
|
|
|
@InProceedings{ Szegedy.Ioffe.Vanhoucke.ea.2017,
|
|
title = {Inception-v4, inception-resnet and the impact of residual
|
|
connections on learning},
|
|
author = {Szegedy, Christian and Ioffe, Sergey and Vanhoucke,
|
|
Vincent and Alemi, Alexander A},
|
|
booktitle = {Thirty-First AAAI Conference on Artificial Intelligence},
|
|
year = {2017}
|
|
}
|
|
|
|
@InProceedings{ Szegedy.Liu.Jia.ea.2015,
|
|
title = {Going deeper with convolutions},
|
|
author = {Szegedy, Christian and Liu, Wei and Jia, Yangqing and
|
|
Sermanet, Pierre and Reed, Scott and Anguelov, Dragomir and
|
|
Erhan, Dumitru and Vanhoucke, Vincent and Rabinovich,
|
|
Andrew},
|
|
booktitle = {Proceedings of the IEEE conference on computer vision and
|
|
pattern recognition},
|
|
pages = {1--9},
|
|
year = {2015}
|
|
}
|
|
|
|
@InProceedings{ Szegedy.Vanhoucke.Ioffe.ea.2016,
|
|
title = {Rethinking the inception architecture for computer
|
|
vision},
|
|
author = {Szegedy, Christian and Vanhoucke, Vincent and Ioffe,
|
|
Sergey and Shlens, Jon and Wojna, Zbigniew},
|
|
booktitle = {Proceedings of the IEEE conference on computer vision and
|
|
pattern recognition},
|
|
pages = {2818--2826},
|
|
year = {2016}
|
|
}
|
|
|
|
@Article{ Tallec.Ollivier.2017,
|
|
title = {Unbiasing truncated backpropagation through time},
|
|
author = {Tallec, Corentin and Ollivier, Yann},
|
|
journal = {arXiv preprint arXiv:1705.08209},
|
|
year = {2017}
|
|
}
|
|
|
|
@InProceedings{ Tang.Wang.2018,
|
|
title = {Personalized top-n sequential recommendation via
|
|
convolutional sequence embedding},
|
|
author = {Tang, Jiaxi and Wang, Ke},
|
|
booktitle = {Proceedings of the Eleventh ACM International Conference
|
|
on Web Search and Data Mining},
|
|
pages = {565--573},
|
|
year = {2018},
|
|
organization = {ACM}
|
|
}
|
|
|
|
@Article{ Tay.Dehghani.Bahri.ea.2020,
|
|
title = {Efficient transformers: A survey},
|
|
author = {Tay, Yi and Dehghani, Mostafa and Bahri, Dara and Metzler,
|
|
Donald},
|
|
journal = {arXiv preprint arXiv:2009.06732},
|
|
year = {2020}
|
|
}
|
|
|
|
@Article{ Teye.Azizpour.Smith.2018,
|
|
title = {Bayesian uncertainty estimation for batch normalized deep
|
|
networks},
|
|
author = {Teye, Mattias and Azizpour, Hossein and Smith, Kevin},
|
|
journal = {arXiv preprint arXiv:1802.06455},
|
|
year = {2018}
|
|
}
|
|
|
|
@Article{ Tieleman.Hinton.2012,
|
|
title = {Lecture 6.5-rmsprop: Divide the gradient by a running
|
|
average of its recent magnitude},
|
|
author = {Tieleman, Tijmen and Hinton, Geoffrey},
|
|
journal = {COURSERA: Neural networks for machine learning},
|
|
volume = {4},
|
|
number = {2},
|
|
pages = {26--31},
|
|
year = {2012}
|
|
}
|
|
|
|
@Article{ Toscher.Jahrer.Bell.2009,
|
|
title = {The bigchaos solution to the netflix grand prize},
|
|
author = {T{\"o}scher, Andreas and Jahrer, Michael and Bell, Robert
|
|
M},
|
|
journal = {Netflix prize documentation},
|
|
pages = {1--52},
|
|
year = {2009}
|
|
}
|
|
|
|
@Article{ Treisman.Gelade.1980,
|
|
title = {A feature-integration theory of attention},
|
|
author = {Treisman, Anne M and Gelade, Garry},
|
|
journal = {Cognitive psychology},
|
|
volume = {12},
|
|
number = {1},
|
|
pages = {97--136},
|
|
year = {1980},
|
|
publisher = {Elsevier}
|
|
}
|
|
|
|
@Article{ Turing.1950,
|
|
title = {Computing machinery and intelligence},
|
|
author = {Turing, Alan},
|
|
journal = {Mind},
|
|
volume = {59},
|
|
number = {236},
|
|
pages = {433},
|
|
year = {1950}
|
|
}
|
|
|
|
@Article{ Uijlings.Van-De-Sande.Gevers.ea.2013,
|
|
title = {Selective search for object recognition},
|
|
author = {Uijlings, Jasper RR and Van De Sande, Koen EA and Gevers,
|
|
Theo and Smeulders, Arnold WM},
|
|
journal = {International journal of computer vision},
|
|
volume = {104},
|
|
number = {2},
|
|
pages = {154--171},
|
|
year = {2013},
|
|
publisher = {Springer}
|
|
}
|
|
|
|
@Book{ Van-Loan.Golub.1983,
|
|
title = {Matrix computations},
|
|
author = {Van Loan, Charles F and Golub, Gene H},
|
|
year = {1983},
|
|
publisher = {Johns Hopkins University Press}
|
|
}
|
|
|
|
@InProceedings{ Vaswani.Shazeer.Parmar.ea.2017,
|
|
title = {Attention is all you need},
|
|
author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and
|
|
Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and
|
|
Kaiser, {\L}ukasz and Polosukhin, Illia},
|
|
booktitle = {Advances in neural information processing systems},
|
|
pages = {5998--6008},
|
|
year = {2017}
|
|
}
|
|
|
|
@InProceedings{ Wang.Davidson.Pan.ea.2016,
|
|
title = {Gunrock: A high-performance graph processing library on
|
|
the GPU},
|
|
author = {Wang, Yangzihao and Davidson, Andrew and Pan, Yuechao and
|
|
Wu, Yuduo and Riffel, Andy and Owens, John D},
|
|
booktitle = {ACM SIGPLAN Notices},
|
|
volume = {51},
|
|
number = {8},
|
|
pages = {11},
|
|
year = {2016},
|
|
organization = {ACM}
|
|
}
|
|
|
|
@Article{ Wang.Li.Liberty.ea.2018,
|
|
title = {Optimal Message Scheduling for Aggregation},
|
|
author = {Wang, Leyuan and Li, Mu and Liberty, Edo and Smola, Alex
|
|
J},
|
|
journal = {NETWORKS},
|
|
volume = {2},
|
|
number = {3},
|
|
pages = {2--3},
|
|
year = {2018}
|
|
}
|
|
|
|
@Article{ Warstadt.Singh.Bowman.2019,
|
|
title = {Neural network acceptability judgments},
|
|
author = {Warstadt, Alex and Singh, Amanpreet and Bowman, Samuel R},
|
|
journal = {Transactions of the Association for Computational
|
|
Linguistics},
|
|
volume = {7},
|
|
pages = {625--641},
|
|
year = {2019},
|
|
publisher = {MIT Press}
|
|
}
|
|
|
|
@Book{ Wasserman.2013,
|
|
title = {All of statistics: a concise course in statistical
|
|
inference},
|
|
author = {Wasserman, Larry},
|
|
year = {2013},
|
|
publisher = {Springer Science \& Business Media}
|
|
}
|
|
|
|
@Article{ Watkins.Dayan.1992,
|
|
title = {Q-learning},
|
|
author = {Watkins, Christopher JCH and Dayan, Peter},
|
|
journal = {Machine learning},
|
|
volume = {8},
|
|
number = {3-4},
|
|
pages = {279--292},
|
|
year = {1992},
|
|
publisher = {Springer}
|
|
}
|
|
|
|
@Article{ Watson.1964,
|
|
title = {Smooth regression analysis},
|
|
author = {Watson, Geoffrey S},
|
|
journal = {Sankhy{\=a}: The Indian Journal of Statistics, Series A},
|
|
pages = {359--372},
|
|
year = {1964},
|
|
publisher = {JSTOR}
|
|
}
|
|
|
|
@InProceedings{ Welling.Teh.2011,
|
|
title = {Bayesian learning via stochastic gradient Langevin
|
|
dynamics},
|
|
author = {Welling, Max and Teh, Yee W},
|
|
booktitle = {Proceedings of the 28th international conference on
|
|
machine learning (ICML-11)},
|
|
pages = {681--688},
|
|
year = {2011}
|
|
}
|
|
|
|
@Article{ Werbos.1990,
|
|
title = {Backpropagation through time: what it does and how to do
|
|
it},
|
|
author = {Werbos, Paul J},
|
|
journal = {Proceedings of the IEEE},
|
|
volume = {78},
|
|
number = {10},
|
|
pages = {1550--1560},
|
|
year = {1990},
|
|
publisher = {IEEE}
|
|
}
|
|
|
|
@InProceedings{ Wigner.1958,
|
|
title = {On the distribution of the roots of certain symmetric
|
|
matrices},
|
|
author = {Wigner, Eugene P.},
|
|
booktitle = {Ann. Math},
|
|
pages = {325--327},
|
|
year = {1958}
|
|
}
|
|
|
|
@TechReport{ Williams.Waterman.Patterson.2009,
|
|
title = {Roofline: An insightful visual performance model for
|
|
floating-point programs and multicore architectures},
|
|
author = {Williams, Samuel and Waterman, Andrew and Patterson,
|
|
David},
|
|
year = {2009},
|
|
institution = {Lawrence Berkeley National Lab.(LBNL), Berkeley, CA
|
|
(United States)}
|
|
}
|
|
|
|
@Article{ Wood.Gasthaus.Archambeau.ea.2011,
|
|
title = {The sequence memoizer},
|
|
author = {Wood, Frank and Gasthaus, Jan and Archambeau, C{\'e}dric
|
|
and James, Lancelot and Teh, Yee Whye},
|
|
journal = {Communications of the ACM},
|
|
volume = {54},
|
|
number = {2},
|
|
pages = {91--98},
|
|
year = {2011},
|
|
publisher = {ACM}
|
|
}
|
|
|
|
@InProceedings{ Wu.Ahmed.Beutel.ea.2017,
|
|
title = {Recurrent recommender networks},
|
|
author = {Wu, Chao-Yuan and Ahmed, Amr and Beutel, Alex and Smola,
|
|
Alexander J and Jing, How},
|
|
booktitle = {Proceedings of the tenth ACM international conference on
|
|
web search and data mining},
|
|
pages = {495--503},
|
|
year = {2017},
|
|
organization = {ACM}
|
|
}
|
|
|
|
@Article{ Wu.Schuster.Chen.ea.2016,
|
|
title = {Google's neural machine translation system: Bridging the
|
|
gap between human and machine translation},
|
|
author = {Wu, Yonghui and Schuster, Mike and Chen, Zhifeng and Le,
|
|
Quoc V and Norouzi, Mohammad and Macherey, Wolfgang and
|
|
Krikun, Maxim and Cao, Yuan and Gao, Qin and Macherey,
|
|
Klaus and others},
|
|
journal = {arXiv preprint arXiv:1609.08144},
|
|
year = {2016}
|
|
}
|
|
|
|
@InProceedings{ Xiao.Bahri.Sohl-Dickstein.ea.2018,
|
|
title = {Dynamical Isometry and a Mean Field Theory of CNNs: How to
|
|
Train 10,000-Layer Vanilla Convolutional Neural Networks},
|
|
author = {Xiao, Lechao and Bahri, Yasaman and Sohl-Dickstein, Jascha
|
|
and Schoenholz, Samuel and Pennington, Jeffrey},
|
|
booktitle = {International Conference on Machine Learning},
|
|
pages = {5393--5402},
|
|
year = {2018}
|
|
}
|
|
|
|
@Article{ Xiao.Rasul.Vollgraf.2017,
|
|
title = {Fashion-mnist: a novel image dataset for benchmarking
|
|
machine learning algorithms},
|
|
author = {Xiao, Han and Rasul, Kashif and Vollgraf, Roland},
|
|
journal = {arXiv preprint arXiv:1708.07747},
|
|
year = {2017}
|
|
}
|
|
|
|
@InProceedings{ Xiong.Wu.Alleva.ea.2018,
|
|
title = {The Microsoft 2017 conversational speech recognition
|
|
system},
|
|
author = {Xiong, Wayne and Wu, Lingfeng and Alleva, Fil and Droppo,
|
|
Jasha and Huang, Xuedong and Stolcke, Andreas},
|
|
booktitle = {2018 IEEE International Conference on Acoustics, Speech
|
|
and Signal Processing (ICASSP)},
|
|
pages = {5934--5938},
|
|
year = {2018},
|
|
organization = {IEEE}
|
|
}
|
|
|
|
@InProceedings{ Ye.Yin.Lee.ea.2011,
|
|
title = {Exploiting geographical influence for collaborative
|
|
point-of-interest recommendation},
|
|
author = {Ye, Mao and Yin, Peifeng and Lee, Wang-Chien and Lee,
|
|
Dik-Lun},
|
|
booktitle = {Proceedings of the 34th international ACM SIGIR conference
|
|
on Research and development in Information Retrieval},
|
|
pages = {325--334},
|
|
year = {2011},
|
|
organization = {ACM}
|
|
}
|
|
|
|
@Article{ You.Gitman.Ginsburg.2017,
|
|
title = {Large batch training of convolutional networks},
|
|
author = {You, Yang and Gitman, Igor and Ginsburg, Boris},
|
|
journal = {arXiv preprint arXiv:1708.03888},
|
|
year = {2017}
|
|
}
|
|
|
|
@InProceedings{ Zaheer.Reddi.Sachan.ea.2018,
|
|
title = {Adaptive methods for nonconvex optimization},
|
|
author = {Zaheer, Manzil and Reddi, Sashank and Sachan, Devendra and
|
|
Kale, Satyen and Kumar, Sanjiv},
|
|
booktitle = {Advances in Neural Information Processing Systems},
|
|
pages = {9793--9803},
|
|
year = {2018}
|
|
}
|
|
|
|
@Article{ Zeiler.2012,
|
|
title = {ADADELTA: an adaptive learning rate method},
|
|
author = {Zeiler, Matthew D},
|
|
journal = {arXiv preprint arXiv:1212.5701},
|
|
year = {2012}
|
|
}
|
|
|
|
@InProceedings{ Zhang.Tay.Zhang.ea.2021,
|
|
title = {Beyond Fully-Connected Layers with Quaternions:
|
|
Parameterization of Hypercomplex Multiplications with 1/n
|
|
Parameters},
|
|
author = {Zhang, Aston and Tay, Yi and Zhang, Shuai and Chan, Alvin
|
|
and Luu, Anh Tuan and Hui, Siu Cheung and Fu, Jie},
|
|
booktitle = {International Conference on Learning Representations},
|
|
year = {2021}
|
|
}
|
|
|
|
@Article{ Zhang.Yao.Sun.ea.2019,
|
|
title = {Deep learning based recommender system: A survey and new
|
|
perspectives},
|
|
author = {Zhang, Shuai and Yao, Lina and Sun, Aixin and Tay, Yi},
|
|
journal = {ACM Computing Surveys (CSUR)},
|
|
volume = {52},
|
|
number = {1},
|
|
pages = {5},
|
|
year = {2019},
|
|
publisher = {ACM}
|
|
}
|
|
|
|
@Article{ Zhao.Zheng.Xu.ea.2019,
|
|
title = {Object detection with deep learning: A review},
|
|
author = {Zhao, Zhong-Qiu and Zheng, Peng and Xu, Shou-tao and Wu,
|
|
Xindong},
|
|
journal = {IEEE transactions on neural networks and learning
|
|
systems},
|
|
volume = {30},
|
|
number = {11},
|
|
pages = {3212--3232},
|
|
year = {2019},
|
|
publisher = {IEEE}
|
|
}
|
|
|
|
@InProceedings{ Zhu.Kiros.Zemel.ea.2015,
|
|
title = {Aligning books and movies: Towards story-like visual
|
|
explanations by watching movies and reading books},
|
|
author = {Zhu, Yukun and Kiros, Ryan and Zemel, Rich and
|
|
Salakhutdinov, Ruslan and Urtasun, Raquel and Torralba,
|
|
Antonio and Fidler, Sanja},
|
|
booktitle = {Proceedings of the IEEE international conference on
|
|
computer vision},
|
|
pages = {19--27},
|
|
year = {2015}
|
|
}
|
|
|
|
@InProceedings{ Zhu.Park.Isola.ea.2017,
|
|
title = {Unpaired image-to-image translation using cycle-consistent
|
|
adversarial networks},
|
|
author = {Zhu, Jun-Yan and Park, Taesung and Isola, Phillip and
|
|
Efros, Alexei A},
|
|
booktitle = {Proceedings of the IEEE international conference on
|
|
computer vision},
|
|
pages = {2223--2232},
|
|
year = {2017}
|
|
} |