all_layers.hpp 36 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109
  1. /*M///////////////////////////////////////////////////////////////////////////////////////
  2. //
  3. // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
  4. //
  5. // By downloading, copying, installing or using the software you agree to this license.
  6. // If you do not agree to this license, do not download, install,
  7. // copy or use the software.
  8. //
  9. //
  10. // License Agreement
  11. // For Open Source Computer Vision Library
  12. //
  13. // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
  14. // Third party copyrights are property of their respective owners.
  15. //
  16. // Redistribution and use in source and binary forms, with or without modification,
  17. // are permitted provided that the following conditions are met:
  18. //
  19. // * Redistribution's of source code must retain the above copyright notice,
  20. // this list of conditions and the following disclaimer.
  21. //
  22. // * Redistribution's in binary form must reproduce the above copyright notice,
  23. // this list of conditions and the following disclaimer in the documentation
  24. // and/or other materials provided with the distribution.
  25. //
  26. // * The name of the copyright holders may not be used to endorse or promote products
  27. // derived from this software without specific prior written permission.
  28. //
  29. // This software is provided by the copyright holders and contributors "as is" and
  30. // any express or implied warranties, including, but not limited to, the implied
  31. // warranties of merchantability and fitness for a particular purpose are disclaimed.
  32. // In no event shall the Intel Corporation or contributors be liable for any direct,
  33. // indirect, incidental, special, exemplary, or consequential damages
  34. // (including, but not limited to, procurement of substitute goods or services;
  35. // loss of use, data, or profits; or business interruption) however caused
  36. // and on any theory of liability, whether in contract, strict liability,
  37. // or tort (including negligence or otherwise) arising in any way out of
  38. // the use of this software, even if advised of the possibility of such damage.
  39. //
  40. //M*/
  41. #ifndef OPENCV_DNN_DNN_ALL_LAYERS_HPP
  42. #define OPENCV_DNN_DNN_ALL_LAYERS_HPP
  43. #include <opencv2/dnn.hpp>
  44. namespace cv {
  45. namespace dnn {
  46. CV__DNN_INLINE_NS_BEGIN
  47. //! @addtogroup dnn
  48. //! @{
  49. /** @defgroup dnnLayerList Partial List of Implemented Layers
  50. @{
  51. This subsection of dnn module contains information about built-in layers and their descriptions.
  52. Classes listed here, in fact, provides C++ API for creating instances of built-in layers.
  53. In addition to this way of layers instantiation, there is a more common factory API (see @ref dnnLayerFactory), it allows to create layers dynamically (by name) and register new ones.
  54. You can use both API, but factory API is less convenient for native C++ programming and basically designed for use inside importers (see @ref readNetFromCaffe(), @ref readNetFromTorch(), @ref readNetFromTensorflow()).
  55. Built-in layers partially reproduce functionality of corresponding Caffe and Torch7 layers.
  56. In particular, the following layers and Caffe importer were tested to reproduce <a href="http://caffe.berkeleyvision.org/tutorial/layers.html">Caffe</a> functionality:
  57. - Convolution
  58. - Deconvolution
  59. - Pooling
  60. - InnerProduct
  61. - TanH, ReLU, Sigmoid, BNLL, Power, AbsVal
  62. - Softmax
  63. - Reshape, Flatten, Slice, Split
  64. - LRN
  65. - MVN
  66. - Dropout (since it does nothing on forward pass -))
  67. */
  68. class CV_EXPORTS BlankLayer : public Layer
  69. {
  70. public:
  71. static Ptr<Layer> create(const LayerParams &params);
  72. };
  73. /**
  74. * Constant layer produces the same data blob at an every forward pass.
  75. */
  76. class CV_EXPORTS ConstLayer : public Layer
  77. {
  78. public:
  79. static Ptr<Layer> create(const LayerParams &params);
  80. };
  81. //! LSTM recurrent layer
  82. class CV_EXPORTS LSTMLayer : public Layer
  83. {
  84. public:
  85. /** Creates instance of LSTM layer */
  86. static Ptr<LSTMLayer> create(const LayerParams& params);
  87. /** @deprecated Use LayerParams::blobs instead.
  88. @brief Set trained weights for LSTM layer.
  89. LSTM behavior on each step is defined by current input, previous output, previous cell state and learned weights.
  90. Let @f$x_t@f$ be current input, @f$h_t@f$ be current output, @f$c_t@f$ be current state.
  91. Than current output and current cell state is computed as follows:
  92. @f{eqnarray*}{
  93. h_t &= o_t \odot tanh(c_t), \\
  94. c_t &= f_t \odot c_{t-1} + i_t \odot g_t, \\
  95. @f}
  96. where @f$\odot@f$ is per-element multiply operation and @f$i_t, f_t, o_t, g_t@f$ is internal gates that are computed using learned weights.
  97. Gates are computed as follows:
  98. @f{eqnarray*}{
  99. i_t &= sigmoid&(W_{xi} x_t + W_{hi} h_{t-1} + b_i), \\
  100. f_t &= sigmoid&(W_{xf} x_t + W_{hf} h_{t-1} + b_f), \\
  101. o_t &= sigmoid&(W_{xo} x_t + W_{ho} h_{t-1} + b_o), \\
  102. g_t &= tanh &(W_{xg} x_t + W_{hg} h_{t-1} + b_g), \\
  103. @f}
  104. where @f$W_{x?}@f$, @f$W_{h?}@f$ and @f$b_{?}@f$ are learned weights represented as matrices:
  105. @f$W_{x?} \in R^{N_h \times N_x}@f$, @f$W_{h?} \in R^{N_h \times N_h}@f$, @f$b_? \in R^{N_h}@f$.
  106. For simplicity and performance purposes we use @f$ W_x = [W_{xi}; W_{xf}; W_{xo}, W_{xg}] @f$
  107. (i.e. @f$W_x@f$ is vertical concatenation of @f$ W_{x?} @f$), @f$ W_x \in R^{4N_h \times N_x} @f$.
  108. The same for @f$ W_h = [W_{hi}; W_{hf}; W_{ho}, W_{hg}], W_h \in R^{4N_h \times N_h} @f$
  109. and for @f$ b = [b_i; b_f, b_o, b_g]@f$, @f$b \in R^{4N_h} @f$.
  110. @param Wh is matrix defining how previous output is transformed to internal gates (i.e. according to above mentioned notation is @f$ W_h @f$)
  111. @param Wx is matrix defining how current input is transformed to internal gates (i.e. according to above mentioned notation is @f$ W_x @f$)
  112. @param b is bias vector (i.e. according to above mentioned notation is @f$ b @f$)
  113. */
  114. CV_DEPRECATED virtual void setWeights(const Mat &Wh, const Mat &Wx, const Mat &b) = 0;
  115. /** @brief Specifies shape of output blob which will be [[`T`], `N`] + @p outTailShape.
  116. * @details If this parameter is empty or unset then @p outTailShape = [`Wh`.size(0)] will be used,
  117. * where `Wh` is parameter from setWeights().
  118. */
  119. virtual void setOutShape(const MatShape &outTailShape = MatShape()) = 0;
  120. /** @deprecated Use flag `produce_cell_output` in LayerParams.
  121. * @brief Specifies either interpret first dimension of input blob as timestamp dimension either as sample.
  122. *
  123. * If flag is set to true then shape of input blob will be interpreted as [`T`, `N`, `[data dims]`] where `T` specifies number of timestamps, `N` is number of independent streams.
  124. * In this case each forward() call will iterate through `T` timestamps and update layer's state `T` times.
  125. *
  126. * If flag is set to false then shape of input blob will be interpreted as [`N`, `[data dims]`].
  127. * In this case each forward() call will make one iteration and produce one timestamp with shape [`N`, `[out dims]`].
  128. */
  129. CV_DEPRECATED virtual void setUseTimstampsDim(bool use = true) = 0;
  130. /** @deprecated Use flag `use_timestamp_dim` in LayerParams.
  131. * @brief If this flag is set to true then layer will produce @f$ c_t @f$ as second output.
  132. * @details Shape of the second output is the same as first output.
  133. */
  134. CV_DEPRECATED virtual void setProduceCellOutput(bool produce = false) = 0;
  135. /* In common case it use single input with @f$x_t@f$ values to compute output(s) @f$h_t@f$ (and @f$c_t@f$).
  136. * @param input should contain packed values @f$x_t@f$
  137. * @param output contains computed outputs: @f$h_t@f$ (and @f$c_t@f$ if setProduceCellOutput() flag was set to true).
  138. *
  139. * If setUseTimstampsDim() is set to true then @p input[0] should has at least two dimensions with the following shape: [`T`, `N`, `[data dims]`],
  140. * where `T` specifies number of timestamps, `N` is number of independent streams (i.e. @f$ x_{t_0 + t}^{stream} @f$ is stored inside @p input[0][t, stream, ...]).
  141. *
  142. * If setUseTimstampsDim() is set to false then @p input[0] should contain single timestamp, its shape should has form [`N`, `[data dims]`] with at least one dimension.
  143. * (i.e. @f$ x_{t}^{stream} @f$ is stored inside @p input[0][stream, ...]).
  144. */
  145. int inputNameToIndex(String inputName) CV_OVERRIDE;
  146. int outputNameToIndex(const String& outputName) CV_OVERRIDE;
  147. };
  148. /** @brief GRU recurrent one-layer
  149. *
  150. * Accepts input sequence and computes the final hidden state for each element in the batch.
  151. *
  152. * - input[0] containing the features of the input sequence.
  153. * input[0] should have shape [`T`, `N`, `data_dims`] where `T` is sequence length, `N` is batch size, `data_dims` is input size
  154. * - output would have shape [`T`, `N`, `D` * `hidden_size`] where `D = 2` if layer is bidirectional otherwise `D = 1`
  155. *
  156. * Depends on the following attributes:
  157. * - hidden_size - Number of neurons in the hidden layer
  158. * - direction - RNN could be bidirectional or forward
  159. *
  160. * The final hidden state @f$ h_t @f$ computes by the following formulas:
  161. *
  162. @f{eqnarray*}{
  163. r_t = \sigma(W_{ir} x_t + b_{ir} + W_{hr} h_{(t-1)} + b_{hr}) \\
  164. z_t = \sigma(W_{iz} x_t + b_{iz} + W_{hz} h_{(t-1)} + b_{hz}) \\
  165. n_t = \tanh(W_{in} x_t + b_{in} + r_t \odot (W_{hn} h_{(t-1)}+ b_{hn})) \\
  166. h_t = (1 - z_t) \odot n_t + z_t \odot h_{(t-1)} \\
  167. @f}
  168. * Where @f$x_t@f$ is current input, @f$h_{(t-1)}@f$ is previous or initial hidden state.
  169. *
  170. * @f$W_{x?}@f$, @f$W_{h?}@f$ and @f$b_{?}@f$ are learned weights represented as matrices:
  171. * @f$W_{x?} \in R^{N_h \times N_x}@f$, @f$W_{h?} \in R^{N_h \times N_h}@f$, @f$b_? \in R^{N_h}@f$.
  172. *
  173. * @f$\odot@f$ is per-element multiply operation.
  174. */
  175. class CV_EXPORTS GRULayer : public Layer
  176. {
  177. public:
  178. /** Creates instance of GRU layer */
  179. static Ptr<GRULayer> create(const LayerParams& params);
  180. };
  181. /** @brief Classical recurrent layer
  182. Accepts two inputs @f$x_t@f$ and @f$h_{t-1}@f$ and compute two outputs @f$o_t@f$ and @f$h_t@f$.
  183. - input: should contain packed input @f$x_t@f$.
  184. - output: should contain output @f$o_t@f$ (and @f$h_t@f$ if setProduceHiddenOutput() is set to true).
  185. input[0] should have shape [`T`, `N`, `data_dims`] where `T` and `N` is number of timestamps and number of independent samples of @f$x_t@f$ respectively.
  186. output[0] will have shape [`T`, `N`, @f$N_o@f$], where @f$N_o@f$ is number of rows in @f$ W_{xo} @f$ matrix.
  187. If setProduceHiddenOutput() is set to true then @p output[1] will contain a Mat with shape [`T`, `N`, @f$N_h@f$], where @f$N_h@f$ is number of rows in @f$ W_{hh} @f$ matrix.
  188. */
  189. class CV_EXPORTS RNNLayer : public Layer
  190. {
  191. public:
  192. /** Creates instance of RNNLayer */
  193. static Ptr<RNNLayer> create(const LayerParams& params);
  194. /** Setups learned weights.
  195. Recurrent-layer behavior on each step is defined by current input @f$ x_t @f$, previous state @f$ h_t @f$ and learned weights as follows:
  196. @f{eqnarray*}{
  197. h_t &= tanh&(W_{hh} h_{t-1} + W_{xh} x_t + b_h), \\
  198. o_t &= tanh&(W_{ho} h_t + b_o),
  199. @f}
  200. @param Wxh is @f$ W_{xh} @f$ matrix
  201. @param bh is @f$ b_{h} @f$ vector
  202. @param Whh is @f$ W_{hh} @f$ matrix
  203. @param Who is @f$ W_{xo} @f$ matrix
  204. @param bo is @f$ b_{o} @f$ vector
  205. */
  206. virtual void setWeights(const Mat &Wxh, const Mat &bh, const Mat &Whh, const Mat &Who, const Mat &bo) = 0;
  207. /** @brief If this flag is set to true then layer will produce @f$ h_t @f$ as second output.
  208. * @details Shape of the second output is the same as first output.
  209. */
  210. virtual void setProduceHiddenOutput(bool produce = false) = 0;
  211. };
  212. class CV_EXPORTS BaseConvolutionLayer : public Layer
  213. {
  214. public:
  215. CV_DEPRECATED_EXTERNAL Size kernel, stride, pad, dilation, adjustPad;
  216. std::vector<size_t> adjust_pads;
  217. std::vector<size_t> kernel_size, strides, dilations;
  218. std::vector<size_t> pads_begin, pads_end;
  219. String padMode;
  220. int numOutput;
  221. };
  222. class CV_EXPORTS ConvolutionLayer : public BaseConvolutionLayer
  223. {
  224. public:
  225. static Ptr<BaseConvolutionLayer> create(const LayerParams& params);
  226. bool fusedActivation = false;
  227. bool fusedAdd = false;
  228. bool useWinograd = false; // Flag whether to use Winograd to speed up 3x3 convolution.
  229. };
  230. class CV_EXPORTS ConvolutionLayerInt8 : public BaseConvolutionLayer
  231. {
  232. public:
  233. int input_zp, output_zp;
  234. float input_sc, output_sc;
  235. // quantization type flag. The perChannel default is true, that means it contains the parameters
  236. // of per-Channel quantization. Otherwise, that means this layer contains per-Tensor quantized parameters.
  237. bool per_channel;
  238. bool useWinograd = true; // Flag whether to use Winograd to speed up 3x3 convolution.
  239. static Ptr<BaseConvolutionLayer> create(const LayerParams& params);
  240. };
  241. class CV_EXPORTS DeconvolutionLayer : public BaseConvolutionLayer
  242. {
  243. public:
  244. static Ptr<BaseConvolutionLayer> create(const LayerParams& params);
  245. };
  246. class CV_EXPORTS LRNLayer : public Layer
  247. {
  248. public:
  249. int type;
  250. int size;
  251. float alpha, beta, bias;
  252. bool normBySize;
  253. static Ptr<LRNLayer> create(const LayerParams& params);
  254. };
  255. /** @brief ArgMax/ArgMin layer
  256. * @note returns indices as floats, which means the supported range is [-2^24; 2^24]
  257. */
  258. class CV_EXPORTS ArgLayer : public Layer
  259. {
  260. public:
  261. static Ptr<ArgLayer> create(const LayerParams& params);
  262. };
  263. /** @brief Gather layer
  264. */
  265. class CV_EXPORTS GatherLayer : public Layer
  266. {
  267. public:
  268. static Ptr<GatherLayer> create(const LayerParams& params);
  269. };
  270. class CV_EXPORTS PoolingLayer : public Layer
  271. {
  272. public:
  273. int type;
  274. std::vector<size_t> kernel_size, strides;
  275. std::vector<size_t> pads_begin, pads_end;
  276. bool globalPooling; //!< Flag is true if at least one of the axes is global pooled.
  277. std::vector<bool> isGlobalPooling;
  278. bool computeMaxIdx;
  279. String padMode;
  280. bool ceilMode;
  281. // If true for average pooling with padding, divide an every output region
  282. // by a whole kernel area. Otherwise exclude zero padded values and divide
  283. // by number of real values.
  284. bool avePoolPaddedArea;
  285. // ROIPooling parameters.
  286. Size pooledSize;
  287. float spatialScale;
  288. // PSROIPooling parameters.
  289. int psRoiOutChannels;
  290. static Ptr<PoolingLayer> create(const LayerParams& params);
  291. };
  292. class CV_EXPORTS PoolingLayerInt8 : public PoolingLayer
  293. {
  294. public:
  295. int input_zp, output_zp;
  296. float input_sc, output_sc;
  297. static Ptr<PoolingLayerInt8> create(const LayerParams& params);
  298. };
  299. class CV_EXPORTS ReduceLayer : public Layer
  300. {
  301. public:
  302. static Ptr<ReduceLayer> create(const LayerParams& params);
  303. };
  304. class CV_EXPORTS SoftmaxLayer : public Layer
  305. {
  306. public:
  307. bool logSoftMax;
  308. static Ptr<SoftmaxLayer> create(const LayerParams& params);
  309. };
  310. class CV_EXPORTS SoftmaxLayerInt8 : public SoftmaxLayer
  311. {
  312. public:
  313. float output_sc;
  314. int output_zp;
  315. static Ptr<SoftmaxLayerInt8> create(const LayerParams& params);
  316. };
  317. /**
  318. * `InnerProduct`, `MatMul` and `Gemm` operations are all implemented by Fully Connected Layer.
  319. * Parameter `is_matmul` is used to distinguish `MatMul` and `Gemm` from `InnerProduct`.
  320. */
  321. class CV_EXPORTS InnerProductLayer : public Layer
  322. {
  323. public:
  324. int axis;
  325. static Ptr<InnerProductLayer> create(const LayerParams& params);
  326. };
  327. class CV_EXPORTS InnerProductLayerInt8 : public InnerProductLayer
  328. {
  329. public:
  330. int input_zp, output_zp;
  331. float input_sc, output_sc;
  332. // quantization type flag. The perChannel default is true, that means it contains the parameters
  333. // of per-Channel quantization. Otherwise, that means this layer contains per-Tensor quantized parameters.
  334. bool per_channel;
  335. static Ptr<InnerProductLayerInt8> create(const LayerParams& params);
  336. };
  337. class CV_EXPORTS MVNLayer : public Layer
  338. {
  339. public:
  340. float eps;
  341. bool normVariance, acrossChannels;
  342. static Ptr<MVNLayer> create(const LayerParams& params);
  343. };
  344. /* Reshaping */
  345. class CV_EXPORTS ReshapeLayer : public Layer
  346. {
  347. public:
  348. MatShape newShapeDesc;
  349. Range newShapeRange;
  350. static Ptr<ReshapeLayer> create(const LayerParams& params);
  351. };
  352. class CV_EXPORTS FlattenLayer : public Layer
  353. {
  354. public:
  355. static Ptr<FlattenLayer> create(const LayerParams &params);
  356. };
  357. class CV_EXPORTS QuantizeLayer : public Layer
  358. {
  359. public:
  360. std::vector<float> scales;
  361. std::vector<int> zeropoints;
  362. static Ptr<QuantizeLayer> create(const LayerParams &params);
  363. };
  364. class CV_EXPORTS DequantizeLayer : public Layer
  365. {
  366. public:
  367. std::vector<float> scales;
  368. std::vector<int> zeropoints;
  369. static Ptr<DequantizeLayer> create(const LayerParams &params);
  370. };
  371. class CV_EXPORTS RequantizeLayer : public Layer
  372. {
  373. public:
  374. float scale, shift;
  375. static Ptr<RequantizeLayer> create(const LayerParams &params);
  376. };
  377. class CV_EXPORTS ConcatLayer : public Layer
  378. {
  379. public:
  380. int axis;
  381. /**
  382. * @brief Add zero padding in case of concatenation of blobs with different
  383. * spatial sizes.
  384. *
  385. * Details: https://github.com/torch/nn/blob/master/doc/containers.md#depthconcat
  386. */
  387. bool padding;
  388. int paddingValue;
  389. static Ptr<ConcatLayer> create(const LayerParams &params);
  390. };
  391. class CV_EXPORTS SplitLayer : public Layer
  392. {
  393. public:
  394. int outputsCount; //!< Number of copies that will be produced (is ignored when negative).
  395. static Ptr<SplitLayer> create(const LayerParams &params);
  396. };
  397. /**
  398. * Slice layer has several modes:
  399. * 1. Caffe mode
  400. * @param[in] axis Axis of split operation
  401. * @param[in] slice_point Array of split points
  402. *
  403. * Number of output blobs equals to number of split points plus one. The
  404. * first blob is a slice on input from 0 to @p slice_point[0] - 1 by @p axis,
  405. * the second output blob is a slice of input from @p slice_point[0] to
  406. * @p slice_point[1] - 1 by @p axis and the last output blob is a slice of
  407. * input from @p slice_point[-1] up to the end of @p axis size.
  408. *
  409. * 2. TensorFlow mode
  410. * @param begin Vector of start indices
  411. * @param size Vector of sizes
  412. *
  413. * More convenient numpy-like slice. One and only output blob
  414. * is a slice `input[begin[0]:begin[0]+size[0], begin[1]:begin[1]+size[1], ...]`
  415. *
  416. * 3. Torch mode
  417. * @param axis Axis of split operation
  418. *
  419. * Split input blob on the equal parts by @p axis.
  420. */
  421. class CV_EXPORTS SliceLayer : public Layer
  422. {
  423. public:
  424. /**
  425. * @brief Vector of slice ranges.
  426. *
  427. * The first dimension equals number of output blobs.
  428. * Inner vector has slice ranges for the first number of input dimensions.
  429. */
  430. std::vector<std::vector<Range> > sliceRanges;
  431. std::vector<std::vector<int> > sliceSteps;
  432. int axis;
  433. int num_split;
  434. static Ptr<SliceLayer> create(const LayerParams &params);
  435. };
  436. class CV_EXPORTS PermuteLayer : public Layer
  437. {
  438. public:
  439. static Ptr<PermuteLayer> create(const LayerParams& params);
  440. };
  441. /**
  442. * Permute channels of 4-dimensional input blob.
  443. * @param group Number of groups to split input channels and pick in turns
  444. * into output blob.
  445. *
  446. * \f[ groupSize = \frac{number\ of\ channels}{group} \f]
  447. * \f[ output(n, c, h, w) = input(n, groupSize \times (c \% group) + \lfloor \frac{c}{group} \rfloor, h, w) \f]
  448. * Read more at https://arxiv.org/pdf/1707.01083.pdf
  449. */
  450. class CV_EXPORTS ShuffleChannelLayer : public Layer
  451. {
  452. public:
  453. static Ptr<Layer> create(const LayerParams& params);
  454. int group;
  455. };
  456. /**
  457. * @brief Adds extra values for specific axes.
  458. * @param paddings Vector of paddings in format
  459. * @code
  460. * [ pad_before, pad_after, // [0]th dimension
  461. * pad_before, pad_after, // [1]st dimension
  462. * ...
  463. * pad_before, pad_after ] // [n]th dimension
  464. * @endcode
  465. * that represents number of padded values at every dimension
  466. * starting from the first one. The rest of dimensions won't
  467. * be padded.
  468. * @param value Value to be padded. Defaults to zero.
  469. * @param type Padding type: 'constant', 'reflect'
  470. * @param input_dims Torch's parameter. If @p input_dims is not equal to the
  471. * actual input dimensionality then the `[0]th` dimension
  472. * is considered as a batch dimension and @p paddings are shifted
  473. * to a one dimension. Defaults to `-1` that means padding
  474. * corresponding to @p paddings.
  475. */
  476. class CV_EXPORTS PaddingLayer : public Layer
  477. {
  478. public:
  479. static Ptr<PaddingLayer> create(const LayerParams& params);
  480. };
  481. /* Activations */
  482. class CV_EXPORTS ActivationLayer : public Layer
  483. {
  484. public:
  485. virtual void forwardSlice(const float* src, float* dst, int len,
  486. size_t outPlaneSize, int cn0, int cn1) const {};
  487. virtual void forwardSlice(const int* src, const int* lut, int* dst, int len,
  488. size_t outPlaneSize, int cn0, int cn1) const {};
  489. virtual void forwardSlice(const int8_t* src, const int8_t* lut, int8_t* dst, int len,
  490. size_t outPlaneSize, int cn0, int cn1) const {};
  491. };
  492. class CV_EXPORTS ReLULayer : public ActivationLayer
  493. {
  494. public:
  495. float negativeSlope;
  496. static Ptr<ReLULayer> create(const LayerParams &params);
  497. };
  498. class CV_EXPORTS ReLU6Layer : public ActivationLayer
  499. {
  500. public:
  501. float minValue, maxValue;
  502. static Ptr<ReLU6Layer> create(const LayerParams &params);
  503. };
  504. class CV_EXPORTS ChannelsPReLULayer : public ActivationLayer
  505. {
  506. public:
  507. static Ptr<Layer> create(const LayerParams& params);
  508. };
  509. class CV_EXPORTS ELULayer : public ActivationLayer
  510. {
  511. public:
  512. float alpha;
  513. static Ptr<ELULayer> create(const LayerParams &params);
  514. };
  515. class CV_EXPORTS TanHLayer : public ActivationLayer
  516. {
  517. public:
  518. static Ptr<TanHLayer> create(const LayerParams &params);
  519. };
  520. class CV_EXPORTS SwishLayer : public ActivationLayer
  521. {
  522. public:
  523. static Ptr<SwishLayer> create(const LayerParams &params);
  524. };
  525. class CV_EXPORTS MishLayer : public ActivationLayer
  526. {
  527. public:
  528. static Ptr<MishLayer> create(const LayerParams &params);
  529. };
  530. class CV_EXPORTS SigmoidLayer : public ActivationLayer
  531. {
  532. public:
  533. static Ptr<SigmoidLayer> create(const LayerParams &params);
  534. };
  535. class CV_EXPORTS BNLLLayer : public ActivationLayer
  536. {
  537. public:
  538. static Ptr<BNLLLayer> create(const LayerParams &params);
  539. };
  540. class CV_EXPORTS AbsLayer : public ActivationLayer
  541. {
  542. public:
  543. static Ptr<AbsLayer> create(const LayerParams &params);
  544. };
  545. class CV_EXPORTS PowerLayer : public ActivationLayer
  546. {
  547. public:
  548. float power, scale, shift;
  549. static Ptr<PowerLayer> create(const LayerParams &params);
  550. };
  551. class CV_EXPORTS ExpLayer : public ActivationLayer
  552. {
  553. public:
  554. float base, scale, shift;
  555. static Ptr<ExpLayer> create(const LayerParams &params);
  556. };
  557. class CV_EXPORTS CeilLayer : public ActivationLayer
  558. {
  559. public:
  560. static Ptr<CeilLayer> create(const LayerParams &params);
  561. };
  562. class CV_EXPORTS FloorLayer : public ActivationLayer
  563. {
  564. public:
  565. static Ptr<FloorLayer> create(const LayerParams &params);
  566. };
  567. class CV_EXPORTS LogLayer : public ActivationLayer
  568. {
  569. public:
  570. static Ptr<LogLayer> create(const LayerParams &params);
  571. };
  572. class CV_EXPORTS RoundLayer : public ActivationLayer
  573. {
  574. public:
  575. static Ptr<RoundLayer> create(const LayerParams &params);
  576. };
  577. class CV_EXPORTS SqrtLayer : public ActivationLayer
  578. {
  579. public:
  580. static Ptr<SqrtLayer> create(const LayerParams &params);
  581. };
  582. class CV_EXPORTS NotLayer : public ActivationLayer
  583. {
  584. public:
  585. static Ptr<NotLayer> create(const LayerParams &params);
  586. };
  587. class CV_EXPORTS AcosLayer : public ActivationLayer
  588. {
  589. public:
  590. static Ptr<AcosLayer> create(const LayerParams &params);
  591. };
  592. class CV_EXPORTS AcoshLayer : public ActivationLayer
  593. {
  594. public:
  595. static Ptr<AcoshLayer> create(const LayerParams &params);
  596. };
  597. class CV_EXPORTS AsinLayer : public ActivationLayer
  598. {
  599. public:
  600. static Ptr<AsinLayer> create(const LayerParams &params);
  601. };
  602. class CV_EXPORTS AsinhLayer : public ActivationLayer
  603. {
  604. public:
  605. static Ptr<AsinhLayer> create(const LayerParams &params);
  606. };
  607. class CV_EXPORTS AtanLayer : public ActivationLayer
  608. {
  609. public:
  610. static Ptr<AtanLayer> create(const LayerParams &params);
  611. };
  612. class CV_EXPORTS AtanhLayer : public ActivationLayer
  613. {
  614. public:
  615. static Ptr<AtanhLayer> create(const LayerParams &params);
  616. };
  617. class CV_EXPORTS CosLayer : public ActivationLayer
  618. {
  619. public:
  620. static Ptr<CosLayer> create(const LayerParams &params);
  621. };
  622. class CV_EXPORTS CoshLayer : public ActivationLayer
  623. {
  624. public:
  625. static Ptr<CoshLayer> create(const LayerParams &params);
  626. };
  627. class CV_EXPORTS ErfLayer : public ActivationLayer
  628. {
  629. public:
  630. static Ptr<ErfLayer> create(const LayerParams &params);
  631. };
  632. class CV_EXPORTS HardSwishLayer : public ActivationLayer
  633. {
  634. public:
  635. static Ptr<HardSwishLayer> create(const LayerParams &params);
  636. };
  637. class CV_EXPORTS SinLayer : public ActivationLayer
  638. {
  639. public:
  640. static Ptr<SinLayer> create(const LayerParams &params);
  641. };
  642. class CV_EXPORTS SinhLayer : public ActivationLayer
  643. {
  644. public:
  645. static Ptr<SinhLayer> create(const LayerParams &params);
  646. };
  647. class CV_EXPORTS SoftplusLayer : public ActivationLayer
  648. {
  649. public:
  650. static Ptr<SoftplusLayer> create(const LayerParams &params);
  651. };
  652. class CV_EXPORTS SoftsignLayer : public ActivationLayer
  653. {
  654. public:
  655. static Ptr<SoftsignLayer> create(const LayerParams &params);
  656. };
  657. class CV_EXPORTS TanLayer : public ActivationLayer
  658. {
  659. public:
  660. static Ptr<TanLayer> create(const LayerParams &params);
  661. };
  662. class CV_EXPORTS CeluLayer : public ActivationLayer
  663. {
  664. public:
  665. float alpha;
  666. static Ptr<CeluLayer> create(const LayerParams &params);
  667. };
  668. class CV_EXPORTS HardSigmoidLayer : public ActivationLayer
  669. {
  670. public:
  671. float alpha;
  672. float beta;
  673. static Ptr<HardSigmoidLayer> create(const LayerParams &params);
  674. };
  675. class CV_EXPORTS SeluLayer : public ActivationLayer
  676. {
  677. public:
  678. float alpha;
  679. float gamma;
  680. static Ptr<SeluLayer> create(const LayerParams &params);
  681. };
  682. class CV_EXPORTS GeluLayer : public ActivationLayer
  683. {
  684. public:
  685. static Ptr<GeluLayer> create(const LayerParams &params);
  686. };
  687. class CV_EXPORTS GeluApproximationLayer : public ActivationLayer
  688. {
  689. public:
  690. static Ptr<GeluApproximationLayer> create(const LayerParams &params);
  691. };
  692. class CV_EXPORTS ThresholdedReluLayer : public ActivationLayer
  693. {
  694. public:
  695. float alpha;
  696. static Ptr<ThresholdedReluLayer> create(const LayerParams &params);
  697. };
  698. class CV_EXPORTS ActivationLayerInt8 : public ActivationLayer
  699. {
  700. public:
  701. static Ptr<ActivationLayerInt8> create(const LayerParams &params);
  702. };
  703. class CV_EXPORTS SignLayer : public ActivationLayer
  704. {
  705. public:
  706. static Ptr<SignLayer> create(const LayerParams &params);
  707. };
  708. class CV_EXPORTS ShrinkLayer : public ActivationLayer
  709. {
  710. public:
  711. float bias;
  712. float lambd;
  713. static Ptr<ShrinkLayer> create(const LayerParams &params);
  714. };
  715. class CV_EXPORTS ReciprocalLayer : public ActivationLayer
  716. {
  717. public:
  718. static Ptr<ReciprocalLayer> create(const LayerParams &params);
  719. };
  720. /* Layers used in semantic segmentation */
  721. class CV_EXPORTS CropLayer : public Layer
  722. {
  723. public:
  724. static Ptr<Layer> create(const LayerParams &params);
  725. };
  726. /** @brief Element wise operation on inputs
  727. Extra optional parameters:
  728. - "operation" as string. Values are "sum" (default), "prod", "max", "div", "min"
  729. - "coeff" as float array. Specify weights of inputs for SUM operation
  730. - "output_channels_mode" as string. Values are "same" (default, all input must have the same layout), "input_0", "input_0_truncate", "max_input_channels"
  731. */
  732. class CV_EXPORTS EltwiseLayer : public Layer
  733. {
  734. public:
  735. static Ptr<EltwiseLayer> create(const LayerParams &params);
  736. };
  737. class CV_EXPORTS EltwiseLayerInt8 : public Layer
  738. {
  739. public:
  740. static Ptr<EltwiseLayerInt8> create(const LayerParams &params);
  741. };
  742. class CV_EXPORTS NaryEltwiseLayer : public Layer
  743. {
  744. public:
  745. static Ptr<NaryEltwiseLayer> create(const LayerParams &params);
  746. };
  747. class CV_EXPORTS BatchNormLayer : public ActivationLayer
  748. {
  749. public:
  750. bool hasWeights, hasBias;
  751. float epsilon;
  752. static Ptr<BatchNormLayer> create(const LayerParams &params);
  753. };
  754. class CV_EXPORTS BatchNormLayerInt8 : public BatchNormLayer
  755. {
  756. public:
  757. float input_sc, output_sc;
  758. int input_zp, output_zp;
  759. static Ptr<BatchNormLayerInt8> create(const LayerParams &params);
  760. };
  761. class CV_EXPORTS MaxUnpoolLayer : public Layer
  762. {
  763. public:
  764. Size poolKernel;
  765. Size poolPad;
  766. Size poolStride;
  767. static Ptr<MaxUnpoolLayer> create(const LayerParams &params);
  768. };
  769. class CV_EXPORTS ScaleLayer : public Layer
  770. {
  771. public:
  772. bool hasBias;
  773. int axis;
  774. String mode;
  775. static Ptr<ScaleLayer> create(const LayerParams& params);
  776. };
  777. class CV_EXPORTS ScaleLayerInt8 : public ScaleLayer
  778. {
  779. public:
  780. float output_sc;
  781. int output_zp;
  782. static Ptr<ScaleLayerInt8> create(const LayerParams &params);
  783. };
  784. class CV_EXPORTS ShiftLayer : public Layer
  785. {
  786. public:
  787. static Ptr<Layer> create(const LayerParams& params);
  788. };
  789. class CV_EXPORTS ShiftLayerInt8 : public Layer
  790. {
  791. public:
  792. static Ptr<Layer> create(const LayerParams& params);
  793. };
  794. class CV_EXPORTS CompareLayer : public Layer
  795. {
  796. public:
  797. static Ptr<Layer> create(const LayerParams& params);
  798. };
  799. class CV_EXPORTS DataAugmentationLayer : public Layer
  800. {
  801. public:
  802. static Ptr<DataAugmentationLayer> create(const LayerParams& params);
  803. };
  804. class CV_EXPORTS CorrelationLayer : public Layer
  805. {
  806. public:
  807. static Ptr<CorrelationLayer> create(const LayerParams& params);
  808. };
  809. class CV_EXPORTS AccumLayer : public Layer
  810. {
  811. public:
  812. static Ptr<AccumLayer> create(const LayerParams& params);
  813. };
  814. class CV_EXPORTS FlowWarpLayer : public Layer
  815. {
  816. public:
  817. static Ptr<FlowWarpLayer> create(const LayerParams& params);
  818. };
  819. class CV_EXPORTS PriorBoxLayer : public Layer
  820. {
  821. public:
  822. static Ptr<PriorBoxLayer> create(const LayerParams& params);
  823. };
  824. class CV_EXPORTS ReorgLayer : public Layer
  825. {
  826. public:
  827. static Ptr<ReorgLayer> create(const LayerParams& params);
  828. };
  829. class CV_EXPORTS RegionLayer : public Layer
  830. {
  831. public:
  832. float nmsThreshold;
  833. static Ptr<RegionLayer> create(const LayerParams& params);
  834. };
  835. /**
  836. * @brief Detection output layer.
  837. *
  838. * The layer size is: @f$ (1 \times 1 \times N \times 7) @f$
  839. * where N is [keep_top_k] parameter multiplied by batch size. Each row is:
  840. * [image_id, label, confidence, xmin, ymin, xmax, ymax]
  841. * where image_id is the index of image input in the batch.
  842. */
  843. class CV_EXPORTS DetectionOutputLayer : public Layer
  844. {
  845. public:
  846. static Ptr<DetectionOutputLayer> create(const LayerParams& params);
  847. };
  848. /**
  849. * @brief \f$ L_p \f$ - normalization layer.
  850. * @param p Normalization factor. The most common `p = 1` for \f$ L_1 \f$ -
  851. * normalization or `p = 2` for \f$ L_2 \f$ - normalization or a custom one.
  852. * @param eps Parameter \f$ \epsilon \f$ to prevent a division by zero.
  853. * @param across_spatial If true, normalize an input across all non-batch dimensions.
  854. * Otherwise normalize an every channel separately.
  855. *
  856. * Across spatial:
  857. * @f[
  858. * norm = \sqrt[p]{\epsilon + \sum_{x, y, c} |src(x, y, c)|^p } \\
  859. * dst(x, y, c) = \frac{ src(x, y, c) }{norm}
  860. * @f]
  861. *
  862. * Channel wise normalization:
  863. * @f[
  864. * norm(c) = \sqrt[p]{\epsilon + \sum_{x, y} |src(x, y, c)|^p } \\
  865. * dst(x, y, c) = \frac{ src(x, y, c) }{norm(c)}
  866. * @f]
  867. *
  868. * Where `x, y` - spatial coordinates, `c` - channel.
  869. *
  870. * An every sample in the batch is normalized separately. Optionally,
  871. * output is scaled by the trained parameters.
  872. */
  873. class CV_EXPORTS NormalizeBBoxLayer : public Layer
  874. {
  875. public:
  876. float pnorm, epsilon;
  877. CV_DEPRECATED_EXTERNAL bool acrossSpatial;
  878. static Ptr<NormalizeBBoxLayer> create(const LayerParams& params);
  879. };
  880. /**
  881. * @brief Resize input 4-dimensional blob by nearest neighbor or bilinear strategy.
  882. *
  883. * Layer is used to support TensorFlow's resize_nearest_neighbor and resize_bilinear ops.
  884. */
  885. class CV_EXPORTS ResizeLayer : public Layer
  886. {
  887. public:
  888. static Ptr<ResizeLayer> create(const LayerParams& params);
  889. };
  890. /**
  891. * @brief Bilinear resize layer from https://github.com/cdmh/deeplab-public-ver2
  892. *
  893. * It differs from @ref ResizeLayer in output shape and resize scales computations.
  894. */
  895. class CV_EXPORTS InterpLayer : public Layer
  896. {
  897. public:
  898. static Ptr<Layer> create(const LayerParams& params);
  899. };
  900. class CV_EXPORTS ProposalLayer : public Layer
  901. {
  902. public:
  903. static Ptr<ProposalLayer> create(const LayerParams& params);
  904. };
  905. class CV_EXPORTS CropAndResizeLayer : public Layer
  906. {
  907. public:
  908. static Ptr<Layer> create(const LayerParams& params);
  909. };
  910. class CV_EXPORTS CumSumLayer : public Layer
  911. {
  912. public:
  913. int exclusive;
  914. int reverse;
  915. static Ptr<CumSumLayer> create(const LayerParams& params);
  916. };
  917. class CV_EXPORTS ScatterLayer : public Layer
  918. {
  919. public:
  920. static Ptr<ScatterLayer> create(const LayerParams& params);
  921. };
  922. class CV_EXPORTS ScatterNDLayer : public Layer
  923. {
  924. public:
  925. static Ptr<ScatterNDLayer> create(const LayerParams& params);
  926. };
  927. class CV_EXPORTS TileLayer : public Layer
  928. {
  929. public:
  930. static Ptr<TileLayer> create(const LayerParams& params);
  931. };
  932. class CV_EXPORTS LayerNormLayer : public Layer
  933. {
  934. public:
  935. bool hasBias;
  936. int axis;
  937. float epsilon;
  938. static Ptr<LayerNormLayer> create(const LayerParams& params);
  939. };
  940. //! @}
  941. //! @}
  942. CV__DNN_INLINE_NS_END
  943. }
  944. }
  945. #endif