Sacado Package Browser (Single Doxygen Collection)  Version of the Day
FadBLASUnitTests.hpp
Go to the documentation of this file.
1 // $Id$
2 // $Source$
3 // @HEADER
4 // ***********************************************************************
5 //
6 // Sacado Package
7 // Copyright (2006) Sandia Corporation
8 //
9 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // This library is free software; you can redistribute it and/or modify
13 // it under the terms of the GNU Lesser General Public License as
14 // published by the Free Software Foundation; either version 2.1 of the
15 // License, or (at your option) any later version.
16 //
17 // This library is distributed in the hope that it will be useful, but
18 // WITHOUT ANY WARRANTY; without even the implied warranty of
19 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 // Lesser General Public License for more details.
21 //
22 // You should have received a copy of the GNU Lesser General Public
23 // License along with this library; if not, write to the Free Software
24 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
25 // USA
26 // Questions? Contact David M. Gay (dmgay@sandia.gov) or Eric T. Phipps
27 // (etphipp@sandia.gov).
28 //
29 // ***********************************************************************
30 // @HEADER
31 
32 #ifndef FADBLASUNITTESTS_HPP
33 #define FADBLASUNITTESTS_HPP
34 
35 // Sacado includes
36 #include "Sacado_No_Kokkos.hpp"
37 #include "Sacado_Fad_BLAS.hpp"
38 #include "Sacado_Random.hpp"
39 
40 // Cppunit includes
41 #include <cppunit/extensions/HelperMacros.h>
42 
43 #define COMPARE_VALUES(a, b) \
44  CPPUNIT_ASSERT( std::abs(a-b) < this->tol_a + this->tol_r*std::abs(a) );
45 
46 #define COMPARE_FADS(a, b) \
47 CPPUNIT_ASSERT(a.size() == b.size()); \
48 CPPUNIT_ASSERT(a.hasFastAccess() == b.hasFastAccess()); \
49 COMPARE_VALUES(a.val(), b.val()); \
50 for (int k=0; k<a.size(); k++) { \
51  COMPARE_VALUES(a.dx(k), b.dx(k)); \
52  COMPARE_VALUES(a.fastAccessDx(k), b.fastAccessDx(k)); \
53  } \
54  ;
55 
56 #define COMPARE_FAD_VECTORS(X1, X2, n) \
57  CPPUNIT_ASSERT(X1.size() == std::size_t(n)); \
58  CPPUNIT_ASSERT(X2.size() == std::size_t(n)); \
59  for (unsigned int i=0; i<n; i++) { \
60  COMPARE_FADS(X1[i], X2[i]); \
61  } \
62  ;
63 
64 // A class for testing differentiated BLAS operations for general Fad types
65 template <class FadType, class ScalarType>
66 class FadBLASUnitTests : public CppUnit::TestFixture {
67 
69 
71 
76 
81 
86 
91 
94 
104 
109 
117 
128 
138 
146 
154 
156 
157 public:
158 
160 
161  FadBLASUnitTests(int m, int n, int l, int ndot,
162  double absolute_tolerance, double relative_tolerance);
163 
164  void setUp();
165  void tearDown();
166 
167  void testSCAL1();
168  void testSCAL2();
169  void testSCAL3();
170  void testSCAL4();
171 
172  void testCOPY1();
173  void testCOPY2();
174  void testCOPY3();
175  void testCOPY4();
176 
177  void testAXPY1();
178  void testAXPY2();
179  void testAXPY3();
180  void testAXPY4();
181 
182  void testDOT1();
183  void testDOT2();
184  void testDOT3();
185  void testDOT4();
186 
187  void testNRM21();
188  void testNRM22();
189 
190  void testGEMV1();
191  void testGEMV2();
192  void testGEMV3();
193  void testGEMV4();
194  void testGEMV5();
195  void testGEMV6();
196  void testGEMV7();
197  void testGEMV8();
198  void testGEMV9();
199 
200  void testTRMV1();
201  void testTRMV2();
202  void testTRMV3();
203  void testTRMV4();
204 
205  void testGER1();
206  void testGER2();
207  void testGER3();
208  void testGER4();
209  void testGER5();
210  void testGER6();
211  void testGER7();
212 
213  void testGEMM1();
214  void testGEMM2();
215  void testGEMM3();
216  void testGEMM4();
217  void testGEMM5();
218  void testGEMM6();
219  void testGEMM7();
220  void testGEMM8();
221  void testGEMM9();
222  void testGEMM10();
223 
224  void testSYMM1();
225  void testSYMM2();
226  void testSYMM3();
227  void testSYMM4();
228  void testSYMM5();
229  void testSYMM6();
230  void testSYMM7();
231  void testSYMM8();
232  void testSYMM9();
233 
234  void testTRMM1();
235  void testTRMM2();
236  void testTRMM3();
237  void testTRMM4();
238  void testTRMM5();
239  void testTRMM6();
240  void testTRMM7();
241 
242  void testTRSM1();
243  void testTRSM2();
244  void testTRSM3();
245  void testTRSM4();
246  void testTRSM5();
247  void testTRSM6();
248  void testTRSM7();
249 
250 protected:
251 
252  // Random number generator
254 
255  // Real random number generator for derivative components
257 
258  // Number of matrix rows
259  unsigned int m;
260 
261  // Number of matrix columns
262  unsigned int n;
263 
264  // Number of matrix columns for level 3 blas
265  unsigned int l;
266 
267  // Number of derivative components
268  unsigned int ndot;
269 
270  // Tolerances to which fad objects should be the same
271  double tol_a, tol_r;
272 
273 }; // class FadBLASUnitTests
274 
275 template <class FadType, class ScalarType>
278  urand(), real_urand(), m(5), n(6), l(4), ndot(7), tol_a(1.0e-11), tol_r(1.0e-11) {}
279 
280 template <class FadType, class ScalarType>
282 FadBLASUnitTests(int m_, int n_, int l_, int ndot_, double absolute_tolerance,
283  double relative_tolerance) :
284  urand(),
285  real_urand(),
286  m(m_),
287  n(n_),
288  l(l_),
289  ndot(ndot_),
290  tol_a(absolute_tolerance),
291  tol_r(relative_tolerance) {}
292 
293 template <class FadType, class ScalarType>
295 setUp() {}
296 
297 template <class FadType, class ScalarType>
300 
301 // Tests all arguments
302 template <class FadType, class ScalarType>
303 void
306  VectorType x1(m,ndot), x2(m,ndot), x3(m,ndot);
307  for (unsigned int i=0; i<m; i++) {
308  ScalarType val = urand.number();
309  x1[i] = FadType(ndot, val);
310  x2[i] = FadType(ndot, val);
311  x3[i] = FadType(ndot, val);
312  for (unsigned int k=0; k<ndot; k++) {
313  val = urand.number();
314  x1[i].fastAccessDx(k) = val;
315  x2[i].fastAccessDx(k) = val;
316  x3[i].fastAccessDx(k) = val;
317  }
318  }
319  FadType alpha(ndot, urand.number());
320  for (unsigned int k=0; k<ndot; k++) {
321  alpha.fastAccessDx(k) = urand.number();
322  }
323 
324  Teuchos::BLAS<int,FadType> teuchos_blas;
325  teuchos_blas.SCAL(m, alpha, &x1[0], 1);
326 
327  Teuchos::BLAS<int,FadType> sacado_blas(false);
328  sacado_blas.SCAL(m, alpha, &x2[0], 1);
329 
330  COMPARE_FAD_VECTORS(x1, x2, m);
331 
332  unsigned int sz = m*(1+ndot);
333  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
334  sacado_blas2.SCAL(m, alpha, &x3[0], 1);
335 
336  COMPARE_FAD_VECTORS(x1, x3, m);
337 }
338 
339 // Tests non-unit inc
340 template <class FadType, class ScalarType>
341 void
344  unsigned int incx = 2;
345  VectorType x1(m*incx,ndot), x2(m*incx,ndot), x3(m*incx,ndot);
346  for (unsigned int i=0; i<m*incx; i++) {
347  ScalarType val = urand.number();
348  x1[i] = FadType(ndot, val);
349  x2[i] = FadType(ndot, val);
350  x3[i] = FadType(ndot, val);
351  for (unsigned int k=0; k<ndot; k++) {
352  val = urand.number();
353  x1[i].fastAccessDx(k) = val;
354  x2[i].fastAccessDx(k) = val;
355  x3[i].fastAccessDx(k) = val;
356  }
357  }
358  FadType alpha(ndot, urand.number());
359  for (unsigned int k=0; k<ndot; k++) {
360  alpha.fastAccessDx(k) = urand.number();
361  }
362 
363  Teuchos::BLAS<int,FadType> teuchos_blas;
364  teuchos_blas.SCAL(m, alpha, &x1[0], incx);
365 
366  Teuchos::BLAS<int,FadType> sacado_blas(false);
367  sacado_blas.SCAL(m, alpha, &x2[0], incx);
368 
369  COMPARE_FAD_VECTORS(x1, x2, m*incx);
370 
371  unsigned int sz = m*(1+ndot);
372  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
373  sacado_blas2.SCAL(m, alpha, &x3[0], incx);
374 
375  COMPARE_FAD_VECTORS(x1, x3, m*incx);
376 }
377 
378 // Tests constant alpha
379 template <class FadType, class ScalarType>
380 void
383  VectorType x1(m,ndot), x2(m,ndot), x3(m,ndot);
384  for (unsigned int i=0; i<m; i++) {
385  ScalarType val = urand.number();
386  x1[i] = FadType(ndot, val);
387  x2[i] = FadType(ndot, val);
388  x3[i] = FadType(ndot, val);
389  for (unsigned int k=0; k<ndot; k++) {
390  val = urand.number();
391  x1[i].fastAccessDx(k) = val;
392  x2[i].fastAccessDx(k) = val;
393  x3[i].fastAccessDx(k) = val;
394  }
395  }
396  ScalarType alpha = urand.number();
397 
398  Teuchos::BLAS<int,FadType> teuchos_blas;
399  teuchos_blas.SCAL(m, alpha, &x1[0], 1);
400 
401  Teuchos::BLAS<int,FadType> sacado_blas(false);
402  sacado_blas.SCAL(m, alpha, &x2[0], 1);
403 
404  COMPARE_FAD_VECTORS(x1, x2, m);
405 
406  unsigned int sz = m*(1+ndot);
407  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
408  sacado_blas2.SCAL(m, alpha, &x3[0], 1);
409 
410  COMPARE_FAD_VECTORS(x1, x3, m);
411 }
412 
413 // Tests constant x
414 template <class FadType, class ScalarType>
415 void
418  VectorType x1(m,ndot), x2(m,ndot), x3(m,ndot);
419  for (unsigned int i=0; i<m; i++) {
420  ScalarType val = urand.number();
421  x1[i] = val;
422  x2[i] = val;
423  x3[i] = val;
424  }
425  FadType alpha = FadType(ndot, urand.number());
426  for (unsigned int k=0; k<ndot; k++)
427  alpha.fastAccessDx(k) = urand.number();
428 
429  Teuchos::BLAS<int,FadType> teuchos_blas;
430  teuchos_blas.SCAL(m, alpha, &x1[0], 1);
431 
432  Teuchos::BLAS<int,FadType> sacado_blas(false);
433  sacado_blas.SCAL(m, alpha, &x2[0], 1);
434 
435  COMPARE_FAD_VECTORS(x1, x2, m);
436 
437  unsigned int sz = m*(1+ndot);
438  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
439  sacado_blas2.SCAL(m, alpha, &x3[0], 1);
440 
441  COMPARE_FAD_VECTORS(x1, x3, m);
442 }
443 
444 // Tests all arguments
445 template <class FadType, class ScalarType>
446 void
449  VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
450  for (unsigned int i=0; i<m; i++) {
451  x[i] = FadType(ndot, urand.number());
452  ScalarType val = urand.number();
453  y1[i] = FadType(ndot, val);
454  y2[i] = FadType(ndot, val);
455  y3[i] = FadType(ndot, val);
456  for (unsigned int k=0; k<ndot; k++) {
457  x[i].fastAccessDx(k) = urand.number();
458  val = urand.number();
459  y1[i].fastAccessDx(k) = val;
460  y2[i].fastAccessDx(k) = val;
461  y3[i].fastAccessDx(k) = val;
462  }
463  }
464 
465  Teuchos::BLAS<int,FadType> teuchos_blas;
466  teuchos_blas.COPY(m, &x[0], 1, &y1[0], 1);
467 
468  Teuchos::BLAS<int,FadType> sacado_blas(false);
469  sacado_blas.COPY(m, &x[0], 1, &y2[0], 1);
470 
471  COMPARE_FAD_VECTORS(y1, y2, m);
472 
473  unsigned int sz = 2*m*(1+ndot);
474  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
475  sacado_blas2.COPY(m, &x[0], 1, &y3[0], 1);
476 
477  COMPARE_FAD_VECTORS(y1, y3, m);
478 }
479 
480 // Tests non unit inc
481 template <class FadType, class ScalarType>
482 void
485  unsigned int incx = 2;
486  unsigned int incy = 3;
487  VectorType x(m*incx,ndot), y1(m*incy,ndot), y2(m*incy,ndot), y3(m*incy,ndot);
488  for (unsigned int i=0; i<m*incx; i++) {
489  x[i] = FadType(ndot, urand.number());
490  for (unsigned int k=0; k<ndot; k++) {
491  x[i].fastAccessDx(k) = urand.number();
492  }
493  }
494  for (unsigned int i=0; i<m*incy; i++) {
495  ScalarType val = urand.number();
496  y1[i] = FadType(ndot, val);
497  y2[i] = FadType(ndot, val);
498  y3[i] = FadType(ndot, val);
499  for (unsigned int k=0; k<ndot; k++) {
500  val = urand.number();
501  y1[i].fastAccessDx(k) = val;
502  y2[i].fastAccessDx(k) = val;
503  y3[i].fastAccessDx(k) = val;
504  }
505  }
506 
507  Teuchos::BLAS<int,FadType> teuchos_blas;
508  teuchos_blas.COPY(m, &x[0], incx, &y1[0], incy);
509 
510  Teuchos::BLAS<int,FadType> sacado_blas(false);
511  sacado_blas.COPY(m, &x[0], incx, &y2[0], incy);
512 
513  COMPARE_FAD_VECTORS(y1, y2, m*incy);
514 
515  unsigned int sz = 2*m*(1+ndot);
516  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
517  sacado_blas2.COPY(m, &x[0], incx, &y3[0], incy);
518 
519  COMPARE_FAD_VECTORS(y1, y3, m*incy);
520 }
521 
522 // Tests x constant
523 template <class FadType, class ScalarType>
524 void
527  VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
528  for (unsigned int i=0; i<m; i++) {
529  x[i] = urand.number();
530  }
531  for (unsigned int i=0; i<m; i++) {
532  ScalarType val = urand.number();
533  y1[i] = FadType(ndot, val);
534  y2[i] = FadType(ndot, val);
535  y3[i] = FadType(ndot, val);
536  for (unsigned int k=0; k<ndot; k++) {
537  val = urand.number();
538  y1[i].fastAccessDx(k) = val;
539  y2[i].fastAccessDx(k) = val;
540  y3[i].fastAccessDx(k) = val;
541  }
542  }
543 
544  Teuchos::BLAS<int,FadType> teuchos_blas;
545  teuchos_blas.COPY(m, &x[0], 1, &y1[0], 1);
546 
547  Teuchos::BLAS<int,FadType> sacado_blas(false);
548  sacado_blas.COPY(m, &x[0], 1, &y2[0], 1);
549 
550  COMPARE_FAD_VECTORS(y1, y2, m);
551 
552  unsigned int sz = 2*m*(1+ndot);
553  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
554  sacado_blas2.COPY(m, &x[0], 1, &y3[0], 1);
555 
556  COMPARE_FAD_VECTORS(y1, y3, m);
557 }
558 
559 // Tests y constant
560 template <class FadType, class ScalarType>
561 void
564  VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
565  for (unsigned int i=0; i<m; i++) {
566  x[i] = FadType(ndot, urand.number());
567  ScalarType val = urand.number();
568  y1[i] = val;
569  y2[i] = val;
570  y3[i] = val;
571  for (unsigned int k=0; k<ndot; k++) {
572  x[i].fastAccessDx(k) = urand.number();
573  }
574  }
575 
576  Teuchos::BLAS<int,FadType> teuchos_blas;
577  teuchos_blas.COPY(m, &x[0], 1, &y1[0], 1);
578 
579  Teuchos::BLAS<int,FadType> sacado_blas(false);
580  sacado_blas.COPY(m, &x[0], 1, &y2[0], 1);
581 
582  COMPARE_FAD_VECTORS(y1, y2, m);
583 
584  unsigned int sz = 2*m*(1+ndot);
585  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
586  sacado_blas2.COPY(m, &x[0], 1, &y3[0], 1);
587 
588  COMPARE_FAD_VECTORS(y1, y3, m);
589 }
590 
591 // Tests all arguments
592 template <class FadType, class ScalarType>
593 void
596  VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
597  for (unsigned int i=0; i<m; i++) {
598  x[i] = FadType(ndot, urand.number());
599  ScalarType val = urand.number();
600  y1[i] = FadType(ndot, val);
601  y2[i] = FadType(ndot, val);
602  y3[i] = FadType(ndot, val);
603  for (unsigned int k=0; k<ndot; k++) {
604  x[i].fastAccessDx(k) = urand.number();
605  val = urand.number();
606  y1[i].fastAccessDx(k) = val;
607  y2[i].fastAccessDx(k) = val;
608  y3[i].fastAccessDx(k) = val;
609  }
610  }
611  FadType alpha(ndot, urand.number());
612  for (unsigned int k=0; k<ndot; k++)
613  alpha.fastAccessDx(k) = urand.number();
614 
615  Teuchos::BLAS<int,FadType> teuchos_blas;
616  teuchos_blas.AXPY(m, alpha, &x[0], 1, &y1[0], 1);
617 
618  Teuchos::BLAS<int,FadType> sacado_blas(false);
619  sacado_blas.AXPY(m, alpha, &x[0], 1, &y2[0], 1);
620 
621  COMPARE_FAD_VECTORS(y1, y2, m);
622 
623  unsigned int sz = 2*m*(1+ndot);
624  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
625  sacado_blas2.AXPY(m, alpha, &x[0], 1, &y3[0], 1);
626 
627  COMPARE_FAD_VECTORS(y1, y3, m);
628 }
629 
630 // Tests non unit inc
631 template <class FadType, class ScalarType>
632 void
635  unsigned int incx = 2;
636  unsigned int incy = 3;
637  VectorType x(m*incx,ndot), y1(m*incy,ndot), y2(m*incy,ndot), y3(m*incy,ndot);
638  for (unsigned int i=0; i<m*incx; i++) {
639  x[i] = FadType(ndot, urand.number());
640  for (unsigned int k=0; k<ndot; k++) {
641  x[i].fastAccessDx(k) = urand.number();
642  }
643  }
644  for (unsigned int i=0; i<m*incy; i++) {
645  ScalarType val = urand.number();
646  y1[i] = FadType(ndot, val);
647  y2[i] = FadType(ndot, val);
648  y3[i] = FadType(ndot, val);
649  for (unsigned int k=0; k<ndot; k++) {
650  val = urand.number();
651  y1[i].fastAccessDx(k) = val;
652  y2[i].fastAccessDx(k) = val;
653  y3[i].fastAccessDx(k) = val;
654  }
655  }
656  FadType alpha(ndot, urand.number());
657  for (unsigned int k=0; k<ndot; k++)
658  alpha.fastAccessDx(k) = urand.number();
659 
660  Teuchos::BLAS<int,FadType> teuchos_blas;
661  teuchos_blas.AXPY(m, alpha, &x[0], incx, &y1[0], incy);
662 
663  Teuchos::BLAS<int,FadType> sacado_blas(false);
664  sacado_blas.AXPY(m, alpha, &x[0], incx, &y2[0], incy);
665 
666  COMPARE_FAD_VECTORS(y1, y2, m*incy);
667 
668  unsigned int sz = 2*m*(1+ndot);
669  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
670  sacado_blas2.AXPY(m, alpha, &x[0], incx, &y3[0], incy);
671 
672  COMPARE_FAD_VECTORS(y1, y3, m*incy);
673 }
674 
675 // Tests x constant
676 template <class FadType, class ScalarType>
677 void
680  VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot), y4(m,ndot);
681  std::vector<ScalarType> xx(m);
682  for (unsigned int i=0; i<m; i++) {
683  xx[i] = urand.number();
684  x[i] = xx[i];
685  ScalarType val = urand.number();
686  y1[i] = FadType(ndot, val);
687  y2[i] = FadType(ndot, val);
688  y3[i] = FadType(ndot, val);
689  y4[i] = FadType(ndot, val);
690  for (unsigned int k=0; k<ndot; k++) {
691  val = urand.number();
692  y1[i].fastAccessDx(k) = val;
693  y2[i].fastAccessDx(k) = val;
694  y3[i].fastAccessDx(k) = val;
695  y4[i].fastAccessDx(k) = val;
696  }
697  }
698  FadType alpha(ndot, urand.number());
699  for (unsigned int k=0; k<ndot; k++)
700  alpha.fastAccessDx(k) = urand.number();
701 
702  Teuchos::BLAS<int,FadType> teuchos_blas;
703  teuchos_blas.AXPY(m, alpha, &x[0], 1, &y1[0], 1);
704 
705  Teuchos::BLAS<int,FadType> sacado_blas(false);
706  sacado_blas.AXPY(m, alpha, &x[0], 1, &y2[0], 1);
707 
708  COMPARE_FAD_VECTORS(y1, y2, m);
709 
710  unsigned int sz = m*(1+ndot)+m;
711  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
712  sacado_blas2.AXPY(m, alpha, &x[0], 1, &y3[0], 1);
713 
714  COMPARE_FAD_VECTORS(y1, y3, m);
715 
716  sacado_blas.AXPY(m, alpha, &xx[0], 1, &y4[0], 1);
717 
718  COMPARE_FAD_VECTORS(y1, y4, m);
719 }
720 
721 // Tests y constant
722 template <class FadType, class ScalarType>
723 void
726  VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
727  for (unsigned int i=0; i<m; i++) {
728  x[i] = FadType(ndot, urand.number());
729  ScalarType val = urand.number();
730  y1[i] = val;
731  y2[i] = val;
732  y3[i] = val;
733  for (unsigned int k=0; k<ndot; k++) {
734  x[i].fastAccessDx(k) = urand.number();
735  }
736  }
737  FadType alpha(ndot, urand.number());
738  for (unsigned int k=0; k<ndot; k++)
739  alpha.fastAccessDx(k) = urand.number();
740 
741  Teuchos::BLAS<int,FadType> teuchos_blas;
742  teuchos_blas.AXPY(m, alpha, &x[0], 1, &y1[0], 1);
743 
744  Teuchos::BLAS<int,FadType> sacado_blas(false);
745  sacado_blas.AXPY(m, alpha, &x[0], 1, &y2[0], 1);
746 
747  COMPARE_FAD_VECTORS(y1, y2, m);
748 
749  unsigned int sz = 2*m*(1+ndot);
750  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
751  sacado_blas2.AXPY(m, alpha, &x[0], 1, &y3[0], 1);
752 
753  COMPARE_FAD_VECTORS(y1, y3, m);
754 }
755 
756 // Tests all arguments
757 template <class FadType, class ScalarType>
758 void
761  VectorType X(m,ndot), Y(m,ndot);
762  for (unsigned int i=0; i<m; i++) {
763  X[i] = FadType(ndot, real_urand.number());
764  Y[i] = FadType(ndot, real_urand.number());
765  for (unsigned int k=0; k<ndot; k++) {
766  X[i].fastAccessDx(k) = real_urand.number();
767  Y[i].fastAccessDx(k) = real_urand.number();
768  }
769  }
770 
771  Teuchos::BLAS<int,FadType> teuchos_blas;
772  FadType z1 = teuchos_blas.DOT(m, &X[0], 1, &Y[0], 1);
773 
774  Teuchos::BLAS<int,FadType> sacado_blas(false);
775  FadType z2 = sacado_blas.DOT(m, &X[0], 1, &Y[0], 1);
776 
777  COMPARE_FADS(z1, z2);
778 
779  unsigned int sz = 2*m*(1+ndot);
780  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
781  FadType z3 = sacado_blas2.DOT(m, &X[0], 1, &Y[0], 1);
782 
783  COMPARE_FADS(z1, z3);
784 }
785 
786 // Tests non-unit inc
787 template <class FadType, class ScalarType>
788 void
791  unsigned int incx = 2;
792  unsigned int incy = 3;
793  VectorType X(m*incx,ndot), Y(m*incy,ndot);
794  for (unsigned int i=0; i<m*incx; i++) {
795  X[i] = FadType(ndot, real_urand.number());
796  for (unsigned int k=0; k<ndot; k++) {
797  X[i].fastAccessDx(k) = real_urand.number();
798  }
799  }
800  for (unsigned int i=0; i<m*incy; i++) {
801  Y[i] = FadType(ndot, real_urand.number());
802  for (unsigned int k=0; k<ndot; k++) {
803  Y[i].fastAccessDx(k) = real_urand.number();
804  }
805  }
806 
807  Teuchos::BLAS<int,FadType> teuchos_blas;
808  FadType z1 = teuchos_blas.DOT(m, &X[0], incx, &Y[0], incy);
809 
810  Teuchos::BLAS<int,FadType> sacado_blas(false);
811  FadType z2 = sacado_blas.DOT(m, &X[0], incx, &Y[0], incy);
812 
813  COMPARE_FADS(z1, z2);
814 
815  unsigned int sz = 2*m*(1+ndot);
816  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
817  FadType z3 = sacado_blas2.DOT(m, &X[0], incx, &Y[0], incy);
818 
819  COMPARE_FADS(z1, z3);
820 }
821 
822 // Tests X constant
823 template <class FadType, class ScalarType>
824 void
827  VectorType X(m,0), Y(m,ndot);
828  std::vector<ScalarType> x(m);
829  for (unsigned int i=0; i<m; i++) {
830  x[i] = urand.number();
831  X[i] = x[i];
832  Y[i] = FadType(ndot, real_urand.number());
833  for (unsigned int k=0; k<ndot; k++) {
834  Y[i].fastAccessDx(k) = real_urand.number();
835  }
836  }
837 
838  Teuchos::BLAS<int,FadType> teuchos_blas;
839  FadType z1 = teuchos_blas.DOT(m, &X[0], 1, &Y[0], 1);
840 
841  Teuchos::BLAS<int,FadType> sacado_blas(false);
842  FadType z2 = sacado_blas.DOT(m, &X[0], 1, &Y[0], 1);
843 
844  COMPARE_FADS(z1, z2);
845 
846  unsigned int sz = 2*m*(1+ndot);
847  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
848  FadType z3 = sacado_blas2.DOT(m, &X[0], 1, &Y[0], 1);
849 
850  COMPARE_FADS(z1, z3);
851 
852  FadType z4 = sacado_blas.DOT(m, &x[0], 1, &Y[0], 1);
853 
854  COMPARE_FADS(z1, z4);
855 }
856 
857 // Tests Y constant
858 template <class FadType, class ScalarType>
859 void
862  VectorType X(m,ndot), Y(m,0);
863  std::vector<ScalarType> y(m);
864  for (unsigned int i=0; i<m; i++) {
865  X[i] = FadType(ndot, real_urand.number());
866  y[i] = urand.number();
867  Y[i] = y[i];
868  for (unsigned int k=0; k<ndot; k++) {
869  X[i].fastAccessDx(k) = real_urand.number();
870  }
871  }
872 
873  Teuchos::BLAS<int,FadType> teuchos_blas;
874  FadType z1 = teuchos_blas.DOT(m, &X[0], 1, &Y[0], 1);
875 
876  Teuchos::BLAS<int,FadType> sacado_blas(false);
877  FadType z2 = sacado_blas.DOT(m, &X[0], 1, &Y[0], 1);
878 
879  COMPARE_FADS(z1, z2);
880 
881  unsigned int sz = 2*m*(1+ndot);
882  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
883  FadType z3 = sacado_blas2.DOT(m, &X[0], 1, &Y[0], 1);
884 
885  COMPARE_FADS(z1, z3);
886 
887  FadType z4 = sacado_blas.DOT(m, &X[0], 1, &y[0], 1);
888 
889  COMPARE_FADS(z1, z4);
890 }
891 
892 // Tests all arguments
893 template <class FadType, class ScalarType>
894 void
897  VectorType X(m,ndot);
898  for (unsigned int i=0; i<m; i++) {
899  X[i] = FadType(ndot, real_urand.number());
900  for (unsigned int k=0; k<ndot; k++) {
901  X[i].fastAccessDx(k) = real_urand.number();
902  }
903  }
904 
905  Teuchos::BLAS<int,FadType> teuchos_blas;
907  teuchos_blas.NRM2(m, &X[0], 1);
908 
909  Teuchos::BLAS<int,FadType> sacado_blas(false);
911  sacado_blas.NRM2(m, &X[0], 1);
912 
913  COMPARE_FADS(z1, z2);
914 
915  unsigned int sz = m*(1+ndot);
916  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
918  sacado_blas2.NRM2(m, &X[0], 1);
919 
920  COMPARE_FADS(z1, z3);
921 }
922 
923 // Tests non-unit inc
924 template <class FadType, class ScalarType>
925 void
928  unsigned int incx = 2;
929  VectorType X(m*incx,ndot);
930  for (unsigned int i=0; i<m*incx; i++) {
931  X[i] = FadType(ndot, real_urand.number());
932  for (unsigned int k=0; k<ndot; k++) {
933  X[i].fastAccessDx(k) = real_urand.number();
934  }
935  }
936 
937  Teuchos::BLAS<int,FadType> teuchos_blas;
939  teuchos_blas.NRM2(m, &X[0], incx);
940 
941  Teuchos::BLAS<int,FadType> sacado_blas(false);
943  sacado_blas.NRM2(m, &X[0], incx);
944 
945  COMPARE_FADS(z1, z2);
946 
947  unsigned int sz = m*(1+ndot);
948  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
950  sacado_blas2.NRM2(m, &X[0], incx);
951 
952  COMPARE_FADS(z1, z3);
953 }
954 
955 // Tests all arguments
956 template <class FadType, class ScalarType>
957 void
960  VectorType A(m*n,ndot), B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot);
961  for (unsigned int j=0; j<n; j++) {
962  for (unsigned int i=0; i<m; i++) {
963  A[i+j*m] = FadType(ndot, urand.number());
964  for (unsigned int k=0; k<ndot; k++)
965  A[i+j*m].fastAccessDx(k) = urand.number();
966  }
967  B[j] = FadType(ndot, urand.number());
968  for (unsigned int k=0; k<ndot; k++)
969  B[j].fastAccessDx(k) = urand.number();
970  }
971  FadType alpha(ndot, urand.number());
972  FadType beta(ndot, urand.number());
973  for (unsigned int k=0; k<ndot; k++) {
974  alpha.fastAccessDx(k) = urand.number();
975  beta.fastAccessDx(k) = urand.number();
976  }
977 
978  for (unsigned int i=0; i<m; i++) {
979  ScalarType val = urand.number();
980  C1[i] = FadType(ndot, val);
981  C2[i] = FadType(ndot, val);
982  C3[i] = FadType(ndot, val);
983  for (unsigned int k=0; k<ndot; k++) {
984  val = urand.number();
985  C1[i].fastAccessDx(k) = val;
986  C2[i].fastAccessDx(k) = val;
987  C3[i].fastAccessDx(k) = val;
988  }
989  }
990 
991  Teuchos::BLAS<int,FadType> teuchos_blas;
992  teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
993  beta, &C1[0], 1);
994 
995  Teuchos::BLAS<int,FadType> sacado_blas(false);
996  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
997  beta, &C2[0], 1);
998 
999  COMPARE_FAD_VECTORS(C1, C2, m);
1000 
1001  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1002  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1003  sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1004  beta, &C3[0], 1);
1005 
1006  COMPARE_FAD_VECTORS(C1, C3, m);
1007 }
1008 
1009 // Tests non-unit inc and different lda
1010 template <class FadType, class ScalarType>
1011 void
1014  unsigned int lda = m+3;
1015  unsigned int incb = 2;
1016  unsigned int incc = 3;
1017  VectorType A(lda*n,ndot), B(n*incb,ndot), C1(m*incc,ndot), C2(m*incc,ndot),
1018  C3(m*incc,ndot);
1019  for (unsigned int j=0; j<n; j++) {
1020  for (unsigned int i=0; i<lda; i++) {
1021  A[i+j*lda] = FadType(ndot, urand.number());
1022  for (unsigned int k=0; k<ndot; k++)
1023  A[i+j*lda].fastAccessDx(k) = urand.number();
1024  }
1025  }
1026  for (unsigned int j=0; j<n*incb; j++) {
1027  B[j] = FadType(ndot, urand.number());
1028  for (unsigned int k=0; k<ndot; k++)
1029  B[j].fastAccessDx(k) = urand.number();
1030  }
1031  FadType alpha(ndot, urand.number());
1032  FadType beta(ndot, urand.number());
1033  for (unsigned int k=0; k<ndot; k++) {
1034  alpha.fastAccessDx(k) = urand.number();
1035  beta.fastAccessDx(k) = urand.number();
1036  }
1037 
1038  for (unsigned int i=0; i<m*incc; i++) {
1039  ScalarType val = urand.number();
1040  C1[i] = FadType(ndot, val);
1041  C2[i] = FadType(ndot, val);
1042  C3[i] = FadType(ndot, val);
1043  for (unsigned int k=0; k<ndot; k++) {
1044  val = urand.number();
1045  C1[i].fastAccessDx(k) = val;
1046  C2[i].fastAccessDx(k) = val;
1047  C3[i].fastAccessDx(k) = val;
1048  }
1049  }
1050 
1051  Teuchos::BLAS<int,FadType> teuchos_blas;
1052  teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], lda, &B[0], incb,
1053  beta, &C1[0], incc);
1054 
1055  Teuchos::BLAS<int,FadType> sacado_blas(false);
1056  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], lda, &B[0], incb,
1057  beta, &C2[0], incc);
1058 
1059  COMPARE_FAD_VECTORS(C1, C2, m*incc);
1060 
1061  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1062  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1063  sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], lda, &B[0], incb,
1064  beta, &C3[0], incc);
1065 
1066  COMPARE_FAD_VECTORS(C1, C3, m*incc);
1067 }
1068 
1069 // Tests transpose with all arguments
1070 template <class FadType, class ScalarType>
1071 void
1074  VectorType A(m*n,ndot), B(m,ndot), C1(n,ndot), C2(n,ndot), C3(n,ndot);
1075  for (unsigned int j=0; j<n; j++) {
1076  for (unsigned int i=0; i<m; i++) {
1077  A[i+j*m] = FadType(ndot, urand.number());
1078  for (unsigned int k=0; k<ndot; k++)
1079  A[i+j*m].fastAccessDx(k) = urand.number();
1080  }
1081  }
1082  for (unsigned int j=0; j<m; j++) {
1083  B[j] = FadType(ndot, urand.number());
1084  for (unsigned int k=0; k<ndot; k++)
1085  B[j].fastAccessDx(k) = urand.number();
1086  }
1087  FadType alpha(ndot, urand.number());
1088  FadType beta(ndot, urand.number());
1089  for (unsigned int k=0; k<ndot; k++) {
1090  alpha.fastAccessDx(k) = urand.number();
1091  beta.fastAccessDx(k) = urand.number();
1092  }
1093 
1094  for (unsigned int i=0; i<n; i++) {
1095  ScalarType val = urand.number();
1096  C1[i] = FadType(ndot, val);
1097  C2[i] = FadType(ndot, val);
1098  C3[i] = FadType(ndot, val);
1099  for (unsigned int k=0; k<ndot; k++) {
1100  val = urand.number();
1101  C1[i].fastAccessDx(k) = val;
1102  C2[i].fastAccessDx(k) = val;
1103  C3[i].fastAccessDx(k) = val;
1104  }
1105  }
1106 
1107  Teuchos::BLAS<int,FadType> teuchos_blas;
1108  teuchos_blas.GEMV(Teuchos::TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1109  beta, &C1[0], 1);
1110 
1111  Teuchos::BLAS<int,FadType> sacado_blas(false);
1112  sacado_blas.GEMV(Teuchos::TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1113  beta, &C2[0], 1);
1114 
1115  COMPARE_FAD_VECTORS(C1, C2, n);
1116 
1117  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1118  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1119  sacado_blas2.GEMV(Teuchos::TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1120  beta, &C3[0], 1);
1121 
1122  COMPARE_FAD_VECTORS(C1, C3, n);
1123 }
1124 
1125 // Tests transpose with non-unit inc and different lda
1126 template <class FadType, class ScalarType>
1127 void
1130  unsigned int lda = m+3;
1131  unsigned int incb = 2;
1132  unsigned int incc = 3;
1133  VectorType A(lda*n,ndot), B(m*incb,ndot), C1(n*incc,ndot), C2(n*incc,ndot),
1134  C3(n*incc,ndot);
1135  for (unsigned int j=0; j<n; j++) {
1136  for (unsigned int i=0; i<lda; i++) {
1137  A[i+j*lda] = FadType(ndot, urand.number());
1138  for (unsigned int k=0; k<ndot; k++)
1139  A[i+j*lda].fastAccessDx(k) = urand.number();
1140  }
1141  }
1142  for (unsigned int j=0; j<m*incb; j++) {
1143  B[j] = FadType(ndot, urand.number());
1144  for (unsigned int k=0; k<ndot; k++)
1145  B[j].fastAccessDx(k) = urand.number();
1146  }
1147  FadType alpha(ndot, urand.number());
1148  FadType beta(ndot, urand.number());
1149  for (unsigned int k=0; k<ndot; k++) {
1150  alpha.fastAccessDx(k) = urand.number();
1151  beta.fastAccessDx(k) = urand.number();
1152  }
1153 
1154  for (unsigned int i=0; i<n*incc; i++) {
1155  ScalarType val = urand.number();
1156  C1[i] = FadType(ndot, val);
1157  C2[i] = FadType(ndot, val);
1158  C3[i] = FadType(ndot, val);
1159  for (unsigned int k=0; k<ndot; k++) {
1160  val = urand.number();
1161  C1[i].fastAccessDx(k) = val;
1162  C2[i].fastAccessDx(k) = val;
1163  C3[i].fastAccessDx(k) = val;
1164  }
1165  }
1166 
1167  Teuchos::BLAS<int,FadType> teuchos_blas;
1168  teuchos_blas.GEMV(Teuchos::TRANS, m, n, alpha, &A[0], lda, &B[0], incb,
1169  beta, &C1[0], incc);
1170 
1171  Teuchos::BLAS<int,FadType> sacado_blas(false);
1172  sacado_blas.GEMV(Teuchos::TRANS, m, n, alpha, &A[0], lda, &B[0], incb,
1173  beta, &C2[0], incc);
1174 
1175  COMPARE_FAD_VECTORS(C1, C2, n*incc);
1176 
1177  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1178  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1179  sacado_blas2.GEMV(Teuchos::TRANS, m, n, alpha, &A[0], lda, &B[0], incb,
1180  beta, &C3[0], incc);
1181 
1182  COMPARE_FAD_VECTORS(C1, C3, n*incc);
1183 }
1184 
1185 // Tests with constant C
1186 template <class FadType, class ScalarType>
1187 void
1190  VectorType A(m*n,ndot), B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot);
1191  for (unsigned int j=0; j<n; j++) {
1192  for (unsigned int i=0; i<m; i++) {
1193  A[i+j*m] = FadType(ndot, urand.number());
1194  for (unsigned int k=0; k<ndot; k++)
1195  A[i+j*m].fastAccessDx(k) = urand.number();
1196  }
1197  B[j] = FadType(ndot, urand.number());
1198  for (unsigned int k=0; k<ndot; k++)
1199  B[j].fastAccessDx(k) = urand.number();
1200  }
1201  FadType alpha(ndot, urand.number());
1202  FadType beta(ndot, urand.number());
1203  for (unsigned int k=0; k<ndot; k++) {
1204  alpha.fastAccessDx(k) = urand.number();
1205  beta.fastAccessDx(k) = urand.number();
1206  }
1207 
1208  for (unsigned int i=0; i<m; i++) {
1209  ScalarType val = urand.number();
1210  C1[i] = val;
1211  C2[i] = val;
1212  C3[i] = val;
1213  }
1214 
1215  Teuchos::BLAS<int,FadType> teuchos_blas;
1216  teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1217  beta, &C1[0], 1);
1218 
1219  Teuchos::BLAS<int,FadType> sacado_blas(false);
1220  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1221  beta, &C2[0], 1);
1222 
1223  COMPARE_FAD_VECTORS(C1, C2, m);
1224 
1225  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1226  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1227  sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1228  beta, &C3[0], 1);
1229 
1230  COMPARE_FAD_VECTORS(C1, C3, m);
1231 }
1232 
1233 // Tests with constant alpha, beta
1234 template <class FadType, class ScalarType>
1235 void
1238  VectorType A(m*n,ndot), B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot);
1239  for (unsigned int j=0; j<n; j++) {
1240  for (unsigned int i=0; i<m; i++) {
1241  A[i+j*m] = FadType(ndot, urand.number());
1242  for (unsigned int k=0; k<ndot; k++)
1243  A[i+j*m].fastAccessDx(k) = urand.number();
1244  }
1245  B[j] = FadType(ndot, urand.number());
1246  for (unsigned int k=0; k<ndot; k++)
1247  B[j].fastAccessDx(k) = urand.number();
1248  }
1249  ScalarType alpha = urand.number();
1250  ScalarType beta = urand.number();
1251 
1252  for (unsigned int i=0; i<m; i++) {
1253  ScalarType val = urand.number();
1254  C1[i] = FadType(ndot, val);
1255  C2[i] = FadType(ndot, val);
1256  C3[i] = FadType(ndot, val);
1257  for (unsigned int k=0; k<ndot; k++) {
1258  val = urand.number();
1259  C1[i].fastAccessDx(k) = val;
1260  C2[i].fastAccessDx(k) = val;
1261  C3[i].fastAccessDx(k) = val;
1262  }
1263  }
1264 
1265  Teuchos::BLAS<int,FadType> teuchos_blas;
1266  teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1267  beta, &C1[0], 1);
1268 
1269  Teuchos::BLAS<int,FadType> sacado_blas(false);
1270  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1271  beta, &C2[0], 1);
1272 
1273  COMPARE_FAD_VECTORS(C1, C2, m);
1274 
1275  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1276  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1277  sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1278  beta, &C3[0], 1);
1279 
1280  COMPARE_FAD_VECTORS(C1, C3, m);
1281 }
1282 
1283 // Tests wth constant B
1284 template <class FadType, class ScalarType>
1285 void
1288  VectorType A(m*n,ndot), B(n,0), C1(m,ndot), C2(m,ndot), C3(m,ndot),
1289  C4(m,ndot);
1290  std::vector<ScalarType> b(n);
1291  for (unsigned int j=0; j<n; j++) {
1292  for (unsigned int i=0; i<m; i++) {
1293  A[i+j*m] = FadType(ndot, urand.number());
1294  for (unsigned int k=0; k<ndot; k++)
1295  A[i+j*m].fastAccessDx(k) = urand.number();
1296  }
1297  b[j] = urand.number();
1298  B[j] = b[j];
1299  }
1300  FadType alpha(ndot, urand.number());
1301  FadType beta(ndot, urand.number());
1302  for (unsigned int k=0; k<ndot; k++) {
1303  alpha.fastAccessDx(k) = urand.number();
1304  beta.fastAccessDx(k) = urand.number();
1305  }
1306 
1307  for (unsigned int i=0; i<m; i++) {
1308  ScalarType val = urand.number();
1309  C1[i] = FadType(ndot, val);
1310  C2[i] = FadType(ndot, val);
1311  C3[i] = FadType(ndot, val);
1312  C4[i] = FadType(ndot, val);
1313  for (unsigned int k=0; k<ndot; k++) {
1314  val = urand.number();
1315  C1[i].fastAccessDx(k) = val;
1316  C2[i].fastAccessDx(k) = val;
1317  C3[i].fastAccessDx(k) = val;
1318  C4[i].fastAccessDx(k) = val;
1319  }
1320  }
1321 
1322  Teuchos::BLAS<int,FadType> teuchos_blas;
1323  teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1324  beta, &C1[0], 1);
1325 
1326  Teuchos::BLAS<int,FadType> sacado_blas(false);
1327  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1328  beta, &C2[0], 1);
1329 
1330  COMPARE_FAD_VECTORS(C1, C2, m);
1331 
1332  unsigned int sz = m*n*(1+ndot) + n + m*(1+ndot);
1333  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1334  sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1335  beta, &C3[0], 1);
1336 
1337  COMPARE_FAD_VECTORS(C1, C3, m);
1338 
1339  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &b[0], 1,
1340  beta, &C4[0], 1);
1341 
1342  COMPARE_FAD_VECTORS(C1, C4, m);
1343 }
1344 
1345 // Tests with constant A
1346 template <class FadType, class ScalarType>
1347 void
1350  VectorType A(m*n,0), B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot),
1351  C4(m,ndot);
1352  std::vector<ScalarType> a(m*n);
1353  for (unsigned int j=0; j<n; j++) {
1354  for (unsigned int i=0; i<m; i++) {
1355  a[i+j*m] = urand.number();
1356  A[i+j*m] = a[i+j*m];
1357  }
1358  B[j] = FadType(ndot, urand.number());
1359  for (unsigned int k=0; k<ndot; k++)
1360  B[j].fastAccessDx(k) = urand.number();
1361  }
1362  FadType alpha(ndot, urand.number());
1363  FadType beta(ndot, urand.number());
1364  for (unsigned int k=0; k<ndot; k++) {
1365  alpha.fastAccessDx(k) = urand.number();
1366  beta.fastAccessDx(k) = urand.number();
1367  }
1368 
1369  for (unsigned int i=0; i<m; i++) {
1370  ScalarType val = urand.number();
1371  C1[i] = FadType(ndot, val);
1372  C2[i] = FadType(ndot, val);
1373  C3[i] = FadType(ndot, val);
1374  C4[i] = FadType(ndot, val);
1375  for (unsigned int k=0; k<ndot; k++) {
1376  val = urand.number();
1377  C1[i].fastAccessDx(k) = val;
1378  C2[i].fastAccessDx(k) = val;
1379  C3[i].fastAccessDx(k) = val;
1380  C4[i].fastAccessDx(k) = val;
1381  }
1382  }
1383 
1384  Teuchos::BLAS<int,FadType> teuchos_blas;
1385  teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1386  beta, &C1[0], 1);
1387 
1388  Teuchos::BLAS<int,FadType> sacado_blas(false);
1389  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1390  beta, &C2[0], 1);
1391 
1392  COMPARE_FAD_VECTORS(C1, C2, m);
1393 
1394  unsigned int sz = m*n* + n*(1+ndot) + m*(1+ndot);
1395  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1396  sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1397  beta, &C3[0], 1);
1398 
1399  COMPARE_FAD_VECTORS(C1, C3, m);
1400 
1401  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &a[0], m, &B[0], 1,
1402  beta, &C4[0], 1);
1403 
1404  COMPARE_FAD_VECTORS(C1, C4, m);
1405 }
1406 
1407 // Tests with constant A, B
1408 template <class FadType, class ScalarType>
1409 void
1412  VectorType A(m*n,0), B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot),
1413  C4(m,ndot);
1414  std::vector<ScalarType> a(m*n), b(n);
1415  for (unsigned int j=0; j<n; j++) {
1416  for (unsigned int i=0; i<m; i++) {
1417  a[i+j*m] = urand.number();
1418  A[i+j*m] = a[i+j*m];
1419  }
1420  b[j] = urand.number();
1421  B[j] = b[j];
1422  }
1423  FadType alpha(ndot, urand.number());
1424  FadType beta(ndot, urand.number());
1425  for (unsigned int k=0; k<ndot; k++) {
1426  alpha.fastAccessDx(k) = urand.number();
1427  beta.fastAccessDx(k) = urand.number();
1428  }
1429 
1430  for (unsigned int i=0; i<m; i++) {
1431  ScalarType val = urand.number();
1432  C1[i] = FadType(ndot, val);
1433  C2[i] = FadType(ndot, val);
1434  C3[i] = FadType(ndot, val);
1435  C4[i] = FadType(ndot, val);
1436  for (unsigned int k=0; k<ndot; k++) {
1437  val = urand.number();
1438  C1[i].fastAccessDx(k) = val;
1439  C2[i].fastAccessDx(k) = val;
1440  C3[i].fastAccessDx(k) = val;
1441  C4[i].fastAccessDx(k) = val;
1442  }
1443  }
1444 
1445  Teuchos::BLAS<int,FadType> teuchos_blas;
1446  teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1447  beta, &C1[0], 1);
1448 
1449  Teuchos::BLAS<int,FadType> sacado_blas(false);
1450  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1451  beta, &C2[0], 1);
1452 
1453  COMPARE_FAD_VECTORS(C1, C2, m);
1454 
1455  unsigned int sz = m*n* + n*(1+ndot) + m*(1+ndot);
1456  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1457  sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1458  beta, &C3[0], 1);
1459 
1460  COMPARE_FAD_VECTORS(C1, C3, m);
1461 
1462  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &a[0], m, &b[0], 1,
1463  beta, &C4[0], 1);
1464 
1465  COMPARE_FAD_VECTORS(C1, C4, m);
1466 }
1467 
1468 // Tests all arguments
1469 template <class FadType, class ScalarType>
1470 void
1473  VectorType A(n*n,ndot), x1(n,ndot), x2(n,ndot), x3(n,ndot);
1474  for (unsigned int j=0; j<n; j++) {
1475  for (unsigned int i=0; i<n; i++) {
1476  A[i+j*n] = FadType(ndot, urand.number());
1477  for (unsigned int k=0; k<ndot; k++)
1478  A[i+j*n].fastAccessDx(k) = urand.number();
1479  }
1480  ScalarType val = urand.number();
1481  x1[j] = FadType(ndot, val);
1482  x2[j] = FadType(ndot, val);
1483  x3[j] = FadType(ndot, val);
1484  for (unsigned int k=0; k<ndot; k++) {
1485  val = urand.number();
1486  x1[j].fastAccessDx(k) = val;
1487  x2[j].fastAccessDx(k) = val;
1488  x3[j].fastAccessDx(k) = val;
1489  }
1490  }
1491 
1492  Teuchos::BLAS<int,FadType> teuchos_blas;
1493  teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1494  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1495 
1496  Teuchos::BLAS<int,FadType> sacado_blas(false);
1498  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1499 
1500  COMPARE_FAD_VECTORS(x1, x2, n);
1501 
1502  unsigned int sz = n*n*(1+ndot) + n*(1+ndot);
1503  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1504  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1505  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1506 
1507  COMPARE_FAD_VECTORS(x1, x3, n);
1508 
1509  teuchos_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1510  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1512  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1513  sacado_blas2.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1514  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1515  COMPARE_FAD_VECTORS(x1, x2, n);
1516  COMPARE_FAD_VECTORS(x1, x3, n);
1517 
1518  teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1519  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1520  sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1521  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1522  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1523  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1524  COMPARE_FAD_VECTORS(x1, x2, n);
1525  COMPARE_FAD_VECTORS(x1, x3, n);
1526 
1527  for (unsigned int i=0; i<n; i++) {
1528  A[i*n+i].val() = 1.0;
1529  for (unsigned int k=0; k<ndot; k++)
1530  A[i*n+i].fastAccessDx(k) = 0.0;
1531  }
1532  teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1533  Teuchos::UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1535  Teuchos::UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1536  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1537  Teuchos::UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1538  COMPARE_FAD_VECTORS(x1, x2, n);
1539  COMPARE_FAD_VECTORS(x1, x3, n);
1540 }
1541 
1542 // Tests non unit inc, different lda
1543 template <class FadType, class ScalarType>
1544 void
1547  unsigned int lda = n+3;
1548  unsigned int incx = 2;
1549  VectorType A(lda*n,ndot), x1(n*incx,ndot), x2(n*incx,ndot), x3(n*incx,ndot);
1550  for (unsigned int j=0; j<n; j++) {
1551  for (unsigned int i=0; i<lda; i++) {
1552  A[i+j*lda] = FadType(ndot, urand.number());
1553  for (unsigned int k=0; k<ndot; k++)
1554  A[i+j*lda].fastAccessDx(k) = urand.number();
1555  }
1556  }
1557  for (unsigned int j=0; j<n*incx; j++) {
1558  ScalarType val = urand.number();
1559  x1[j] = FadType(ndot, val);
1560  x2[j] = FadType(ndot, val);
1561  x3[j] = FadType(ndot, val);
1562  for (unsigned int k=0; k<ndot; k++) {
1563  val = urand.number();
1564  x1[j].fastAccessDx(k) = val;
1565  x2[j].fastAccessDx(k) = val;
1566  x3[j].fastAccessDx(k) = val;
1567  }
1568  }
1569 
1570  Teuchos::BLAS<int,FadType> teuchos_blas;
1571  teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1572  Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x1[0], incx);
1573 
1574  Teuchos::BLAS<int,FadType> sacado_blas(false);
1576  Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x2[0], incx);
1577 
1578  COMPARE_FAD_VECTORS(x1, x2, n*incx);
1579 
1580  unsigned int sz = n*n*(1+ndot) + n*(1+ndot);
1581  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1582  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1583  Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x3[0], incx);
1584 
1585  COMPARE_FAD_VECTORS(x1, x3, n*incx);
1586 
1587  teuchos_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1588  Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x1[0], incx);
1590  Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x2[0], incx);
1591  sacado_blas2.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1592  Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x3[0], incx);
1593  COMPARE_FAD_VECTORS(x1, x2, n*incx);
1594  COMPARE_FAD_VECTORS(x1, x3, n*incx);
1595 
1596  teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1597  Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x1[0], incx);
1598  sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1599  Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x2[0], incx);
1600  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1601  Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x3[0], incx);
1602  COMPARE_FAD_VECTORS(x1, x2, n*incx);
1603  COMPARE_FAD_VECTORS(x1, x3, n*incx);
1604 
1605  for (unsigned int i=0; i<n; i++) {
1606  A[i*lda+i].val() = 1.0;
1607  for (unsigned int k=0; k<ndot; k++)
1608  A[i*lda+i].fastAccessDx(k) = 0.0;
1609  }
1610  teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1611  Teuchos::UNIT_DIAG, n, &A[0], lda, &x1[0], incx);
1613  Teuchos::UNIT_DIAG, n, &A[0], lda, &x2[0], incx);
1614  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1615  Teuchos::UNIT_DIAG, n, &A[0], lda, &x3[0], incx);
1616  COMPARE_FAD_VECTORS(x1, x2, n*incx);
1617  COMPARE_FAD_VECTORS(x1, x3, n*incx);
1618 }
1619 
1620 // Tests A constant
1621 template <class FadType, class ScalarType>
1622 void
1625  VectorType A(n*n,ndot), x1(n,ndot), x2(n,ndot), x3(n,ndot), x4(n,ndot),
1626  x5(n,ndot);
1627  std::vector<ScalarType> a(n*n);
1628  for (unsigned int j=0; j<n; j++) {
1629  for (unsigned int i=0; i<n; i++) {
1630  a[i+j*n] = urand.number();
1631  A[i+j*n] = a[i+j*n];
1632  }
1633  ScalarType val = urand.number();
1634  x1[j] = FadType(ndot, val);
1635  x2[j] = FadType(ndot, val);
1636  x3[j] = FadType(ndot, val);
1637  x4[j] = FadType(ndot, val);
1638  x5[j] = FadType(ndot, val);
1639  for (unsigned int k=0; k<ndot; k++) {
1640  val = urand.number();
1641  x1[j].fastAccessDx(k) = val;
1642  x2[j].fastAccessDx(k) = val;
1643  x3[j].fastAccessDx(k) = val;
1644  x4[j].fastAccessDx(k) = val;
1645  x5[j].fastAccessDx(k) = val;
1646  }
1647  }
1648 
1649  Teuchos::BLAS<int,FadType> teuchos_blas;
1650  teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1651  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1652 
1653  Teuchos::BLAS<int,FadType> sacado_blas(false);
1655  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1656 
1657  COMPARE_FAD_VECTORS(x1, x2, n);
1658 
1659  unsigned int sz = n*n+n*(1+ndot);
1660  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1661  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1662  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1663 
1664  COMPARE_FAD_VECTORS(x1, x3, n);
1665 
1667  Teuchos::NON_UNIT_DIAG, n, &a[0], n, &x4[0], 1);
1668 
1669  COMPARE_FAD_VECTORS(x1, x4, n);
1670 
1671  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1672  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x5[0], 1);
1673 
1674  COMPARE_FAD_VECTORS(x1, x5, n);
1675 
1676  teuchos_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1677  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1679  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1680  sacado_blas2.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1681  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1683  Teuchos::NON_UNIT_DIAG, n, &a[0], n, &x4[0], 1);
1684  sacado_blas2.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1685  Teuchos::NON_UNIT_DIAG, n, &a[0], n, &x5[0], 1);
1686  COMPARE_FAD_VECTORS(x1, x2, n);
1687  COMPARE_FAD_VECTORS(x1, x3, n);
1688  COMPARE_FAD_VECTORS(x1, x4, n);
1689  COMPARE_FAD_VECTORS(x1, x5, n);
1690 
1691  teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1692  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1693  sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1694  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1695  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1696  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1697  sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1698  Teuchos::NON_UNIT_DIAG, n, &a[0], n, &x4[0], 1);
1699  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1700  Teuchos::NON_UNIT_DIAG, n, &a[0], n, &x5[0], 1);
1701  COMPARE_FAD_VECTORS(x1, x2, n);
1702  COMPARE_FAD_VECTORS(x1, x3, n);
1703  COMPARE_FAD_VECTORS(x1, x4, n);
1704  COMPARE_FAD_VECTORS(x1, x5, n);
1705 
1706  for (unsigned int i=0; i<n; i++) {
1707  A[i*n+i].val() = 1.0;
1708  for (unsigned int k=0; k<ndot; k++)
1709  A[i*n+i].fastAccessDx(k) = 0.0;
1710  }
1711  teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1712  Teuchos::UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1714  Teuchos::UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1715  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1716  Teuchos::UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1718  Teuchos::UNIT_DIAG, n, &a[0], n, &x4[0], 1);
1719  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1720  Teuchos::UNIT_DIAG, n, &a[0], n, &x5[0], 1);
1721  COMPARE_FAD_VECTORS(x1, x2, n);
1722  COMPARE_FAD_VECTORS(x1, x3, n);
1723  COMPARE_FAD_VECTORS(x1, x4, n);
1724  COMPARE_FAD_VECTORS(x1, x5, n);
1725 }
1726 
1727 // Tests x constant
1728 template <class FadType, class ScalarType>
1729 void
1732  VectorType A(n*n,ndot), x1(n,ndot), x2(n,ndot), x3(n,ndot);
1733  for (unsigned int j=0; j<n; j++) {
1734  for (unsigned int i=0; i<n; i++) {
1735  A[i+j*n] = FadType(ndot, urand.number());
1736  for (unsigned int k=0; k<ndot; k++)
1737  A[i+j*n].fastAccessDx(k) = urand.number();
1738  }
1739  ScalarType val = urand.number();
1740  x1[j] = val;
1741  x2[j] = val;
1742  x3[j] = val;
1743  }
1744 
1745  Teuchos::BLAS<int,FadType> teuchos_blas;
1746  teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1747  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1748 
1749  Teuchos::BLAS<int,FadType> sacado_blas(false);
1751  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1752 
1753  COMPARE_FAD_VECTORS(x1, x2, n);
1754 
1755  unsigned int sz = n*n*(1+ndot) + n*(1+ndot);
1756  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1757  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1758  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1759 
1760  COMPARE_FAD_VECTORS(x1, x3, n);
1761 
1762  teuchos_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1763  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1765  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1766  sacado_blas2.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1767  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1768  COMPARE_FAD_VECTORS(x1, x2, n);
1769  COMPARE_FAD_VECTORS(x1, x3, n);
1770 
1771  teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1772  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1773  sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1774  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1775  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1776  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1777  COMPARE_FAD_VECTORS(x1, x2, n);
1778  COMPARE_FAD_VECTORS(x1, x3, n);
1779 
1780  for (unsigned int i=0; i<n; i++) {
1781  A[i*n+i].val() = 1.0;
1782  for (unsigned int k=0; k<ndot; k++)
1783  A[i*n+i].fastAccessDx(k) = 0.0;
1784  }
1785  teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1786  Teuchos::UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1788  Teuchos::UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1789  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1790  Teuchos::UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1791  COMPARE_FAD_VECTORS(x1, x2, n);
1792  COMPARE_FAD_VECTORS(x1, x3, n);
1793 }
1794 
1795 // Tests all arguments
1796 template <class FadType, class ScalarType>
1797 void
1800 
1801  // GER is apparently not defined in the BLAS for complex types
1803  return;
1804 
1805  VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), x(m,ndot), y(n,ndot);
1806  for (unsigned int j=0; j<n; j++) {
1807  for (unsigned int i=0; i<m; i++) {
1808  ScalarType val = urand.number();
1809  A1[i+j*m] = FadType(ndot, val);
1810  A2[i+j*m] = FadType(ndot, val);
1811  A3[i+j*m] = FadType(ndot, val);
1812  for (unsigned int k=0; k<ndot; k++) {
1813  val = urand.number();
1814  A1[i+j*m].fastAccessDx(k) = val;
1815  A2[i+j*m].fastAccessDx(k) = val;
1816  A3[i+j*m].fastAccessDx(k) = val;
1817  }
1818  }
1819  }
1820  for (unsigned int i=0; i<m; i++) {
1821  x[i] = FadType(ndot, urand.number());
1822  for (unsigned int k=0; k<ndot; k++)
1823  x[i].fastAccessDx(k) = urand.number();
1824  }
1825  for (unsigned int i=0; i<n; i++) {
1826  y[i] = FadType(ndot, urand.number());
1827  for (unsigned int k=0; k<ndot; k++)
1828  y[i].fastAccessDx(k) = urand.number();
1829  }
1830  FadType alpha(ndot, urand.number());
1831  for (unsigned int k=0; k<ndot; k++) {
1832  alpha.fastAccessDx(k) = urand.number();
1833  }
1834 
1835  Teuchos::BLAS<int,FadType> teuchos_blas;
1836  teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
1837 
1838  Teuchos::BLAS<int,FadType> sacado_blas(false);
1839  sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
1840 
1841  COMPARE_FAD_VECTORS(A1, A2, m*n);
1842 
1843  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1844  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1845  sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
1846 
1847  COMPARE_FAD_VECTORS(A1, A3, m*n);
1848 }
1849 
1850 // Tests non unit inc, different lda
1851 template <class FadType, class ScalarType>
1852 void
1855 
1856  // GER is apparently not defined in the BLAS for complex types
1858  return;
1859 
1860  unsigned int lda = m+3;
1861  unsigned int incx = 2;
1862  unsigned int incy = 3;
1863  VectorType A1(lda*n,ndot), A2(lda*n,ndot), A3(lda*n,ndot), x(m*incx,ndot),
1864  y(n*incy,ndot);
1865  for (unsigned int j=0; j<n; j++) {
1866  for (unsigned int i=0; i<lda; i++) {
1867  ScalarType val = urand.number();
1868  A1[i+j*lda] = FadType(ndot, val);
1869  A2[i+j*lda] = FadType(ndot, val);
1870  A3[i+j*lda] = FadType(ndot, val);
1871  for (unsigned int k=0; k<ndot; k++) {
1872  val = urand.number();
1873  A1[i+j*lda].fastAccessDx(k) = val;
1874  A2[i+j*lda].fastAccessDx(k) = val;
1875  A3[i+j*lda].fastAccessDx(k) = val;
1876  }
1877  }
1878  }
1879  for (unsigned int i=0; i<m*incx; i++) {
1880  x[i] = FadType(ndot, urand.number());
1881  for (unsigned int k=0; k<ndot; k++)
1882  x[i].fastAccessDx(k) = urand.number();
1883  }
1884  for (unsigned int i=0; i<n*incy; i++) {
1885  y[i] = FadType(ndot, urand.number());
1886  for (unsigned int k=0; k<ndot; k++)
1887  y[i].fastAccessDx(k) = urand.number();
1888  }
1889  FadType alpha(ndot, urand.number());
1890  for (unsigned int k=0; k<ndot; k++) {
1891  alpha.fastAccessDx(k) = urand.number();
1892  }
1893 
1894  Teuchos::BLAS<int,FadType> teuchos_blas;
1895  teuchos_blas.GER(m, n, alpha, &x[0], incx, &y[0], incy, &A1[0], lda);
1896 
1897  Teuchos::BLAS<int,FadType> sacado_blas(false);
1898  sacado_blas.GER(m, n, alpha, &x[0], incx, &y[0], incy, &A2[0], lda);
1899 
1900  COMPARE_FAD_VECTORS(A1, A2, lda*n);
1901 
1902  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1903  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1904  sacado_blas2.GER(m, n, alpha, &x[0], incx, &y[0], incy, &A3[0], lda);
1905 
1906  COMPARE_FAD_VECTORS(A1, A3, lda*n);
1907 }
1908 
1909 // Tests constant alpha
1910 template <class FadType, class ScalarType>
1911 void
1914 
1915  // GER is apparently not defined in the BLAS for complex types
1917  return;
1918 
1919  VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), x(m,ndot), y(n,ndot);
1920  for (unsigned int j=0; j<n; j++) {
1921  for (unsigned int i=0; i<m; i++) {
1922  ScalarType val = urand.number();
1923  A1[i+j*m] = FadType(ndot, val);
1924  A2[i+j*m] = FadType(ndot, val);
1925  A3[i+j*m] = FadType(ndot, val);
1926  for (unsigned int k=0; k<ndot; k++) {
1927  val = urand.number();
1928  A1[i+j*m].fastAccessDx(k) = val;
1929  A2[i+j*m].fastAccessDx(k) = val;
1930  A3[i+j*m].fastAccessDx(k) = val;
1931  }
1932  }
1933  }
1934  for (unsigned int i=0; i<m; i++) {
1935  x[i] = FadType(ndot, urand.number());
1936  for (unsigned int k=0; k<ndot; k++)
1937  x[i].fastAccessDx(k) = urand.number();
1938  }
1939  for (unsigned int i=0; i<n; i++) {
1940  y[i] = FadType(ndot, urand.number());
1941  for (unsigned int k=0; k<ndot; k++)
1942  y[i].fastAccessDx(k) = urand.number();
1943  }
1944  ScalarType alpha = urand.number();
1945 
1946  Teuchos::BLAS<int,FadType> teuchos_blas;
1947  teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
1948 
1949  Teuchos::BLAS<int,FadType> sacado_blas(false);
1950  sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
1951 
1952  COMPARE_FAD_VECTORS(A1, A2, m*n);
1953 
1954  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1955  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1956  sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
1957 
1958  COMPARE_FAD_VECTORS(A1, A3, m*n);
1959 }
1960 
1961 // Tests constant x
1962 template <class FadType, class ScalarType>
1963 void
1966 
1967  // GER is apparently not defined in the BLAS for complex types
1969  return;
1970 
1971  VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), A4(m*n,ndot),
1972  A5(m*n,ndot), x(m,ndot), y(n,ndot);
1973  std::vector<ScalarType> xx(m);
1974  for (unsigned int j=0; j<n; j++) {
1975  for (unsigned int i=0; i<m; i++) {
1976  ScalarType val = urand.number();
1977  A1[i+j*m] = FadType(ndot, val);
1978  A2[i+j*m] = FadType(ndot, val);
1979  A3[i+j*m] = FadType(ndot, val);
1980  A4[i+j*m] = FadType(ndot, val);
1981  A5[i+j*m] = FadType(ndot, val);
1982  for (unsigned int k=0; k<ndot; k++) {
1983  val = urand.number();
1984  A1[i+j*m].fastAccessDx(k) = val;
1985  A2[i+j*m].fastAccessDx(k) = val;
1986  A3[i+j*m].fastAccessDx(k) = val;
1987  A4[i+j*m].fastAccessDx(k) = val;
1988  A5[i+j*m].fastAccessDx(k) = val;
1989  }
1990  }
1991  }
1992  for (unsigned int i=0; i<m; i++) {
1993  xx[i] = urand.number();
1994  x[i] = xx[i];
1995  }
1996  for (unsigned int i=0; i<n; i++) {
1997  y[i] = FadType(ndot, urand.number());
1998  for (unsigned int k=0; k<ndot; k++)
1999  y[i].fastAccessDx(k) = urand.number();
2000  }
2001  FadType alpha(ndot, urand.number());
2002  for (unsigned int k=0; k<ndot; k++) {
2003  alpha.fastAccessDx(k) = urand.number();
2004  }
2005 
2006  Teuchos::BLAS<int,FadType> teuchos_blas;
2007  teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
2008 
2009  Teuchos::BLAS<int,FadType> sacado_blas(false);
2010  sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
2011 
2012  COMPARE_FAD_VECTORS(A1, A2, m*n);
2013 
2014  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m;
2015  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2016  sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
2017 
2018  COMPARE_FAD_VECTORS(A1, A3, m*n);
2019 
2020  sacado_blas.GER(m, n, alpha, &xx[0], 1, &y[0], 1, &A4[0], m);
2021 
2022  COMPARE_FAD_VECTORS(A1, A4, m*n);
2023 
2024  sacado_blas2.GER(m, n, alpha, &xx[0], 1, &y[0], 1, &A5[0], m);
2025 
2026  COMPARE_FAD_VECTORS(A1, A5, m*n);
2027 }
2028 
2029 // Tests constant y
2030 template <class FadType, class ScalarType>
2031 void
2034 
2035  // GER is apparently not defined in the BLAS for complex types
2037  return;
2038 
2039  VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), A4(m*n,ndot),
2040  A5(m*n,ndot), x(m,ndot), y(n,ndot);
2041  std::vector<ScalarType> yy(n);
2042  for (unsigned int j=0; j<n; j++) {
2043  for (unsigned int i=0; i<m; i++) {
2044  ScalarType val = urand.number();
2045  A1[i+j*m] = FadType(ndot, val);
2046  A2[i+j*m] = FadType(ndot, val);
2047  A3[i+j*m] = FadType(ndot, val);
2048  A4[i+j*m] = FadType(ndot, val);
2049  A5[i+j*m] = FadType(ndot, val);
2050  for (unsigned int k=0; k<ndot; k++) {
2051  val = urand.number();
2052  A1[i+j*m].fastAccessDx(k) = val;
2053  A2[i+j*m].fastAccessDx(k) = val;
2054  A3[i+j*m].fastAccessDx(k) = val;
2055  A4[i+j*m].fastAccessDx(k) = val;
2056  A5[i+j*m].fastAccessDx(k) = val;
2057  }
2058  }
2059  }
2060  for (unsigned int i=0; i<m; i++) {
2061  x[i] = FadType(ndot, urand.number());
2062  for (unsigned int k=0; k<ndot; k++)
2063  x[i].fastAccessDx(k) = urand.number();
2064  }
2065  for (unsigned int i=0; i<n; i++) {
2066  yy[i] = urand.number();
2067  y[i] = yy[i];
2068  }
2069  FadType alpha(ndot, urand.number());
2070  for (unsigned int k=0; k<ndot; k++) {
2071  alpha.fastAccessDx(k) = urand.number();
2072  }
2073 
2074  Teuchos::BLAS<int,FadType> teuchos_blas;
2075  teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
2076 
2077  Teuchos::BLAS<int,FadType> sacado_blas(false);
2078  sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
2079 
2080  COMPARE_FAD_VECTORS(A1, A2, m*n);
2081 
2082  unsigned int sz = m*n*(1+ndot) + m*(1+ndot) + n;
2083  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2084  sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
2085 
2086  COMPARE_FAD_VECTORS(A1, A3, m*n);
2087 
2088  sacado_blas.GER(m, n, alpha, &x[0], 1, &yy[0], 1, &A4[0], m);
2089 
2090  COMPARE_FAD_VECTORS(A1, A4, m*n);
2091 
2092  sacado_blas2.GER(m, n, alpha, &x[0], 1, &yy[0], 1, &A5[0], m);
2093 
2094  COMPARE_FAD_VECTORS(A1, A5, m*n);
2095 }
2096 
2097 // Tests constant x and y
2098 template <class FadType, class ScalarType>
2099 void
2102 
2103  // GER is apparently not defined in the BLAS for complex types
2105  return;
2106 
2107  VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), A4(m*n,ndot),
2108  A5(m*n,ndot), x(m,ndot), y(n,ndot);
2109  std::vector<ScalarType> xx(n), yy(n);
2110  for (unsigned int j=0; j<n; j++) {
2111  for (unsigned int i=0; i<m; i++) {
2112  ScalarType val = urand.number();
2113  A1[i+j*m] = FadType(ndot, val);
2114  A2[i+j*m] = FadType(ndot, val);
2115  A3[i+j*m] = FadType(ndot, val);
2116  A4[i+j*m] = FadType(ndot, val);
2117  A5[i+j*m] = FadType(ndot, val);
2118  for (unsigned int k=0; k<ndot; k++) {
2119  val = urand.number();
2120  A1[i+j*m].fastAccessDx(k) = val;
2121  A2[i+j*m].fastAccessDx(k) = val;
2122  A3[i+j*m].fastAccessDx(k) = val;
2123  A4[i+j*m].fastAccessDx(k) = val;
2124  A5[i+j*m].fastAccessDx(k) = val;
2125  }
2126  }
2127  }
2128  for (unsigned int i=0; i<m; i++) {
2129  xx[i] = urand.number();
2130  x[i] = xx[i];
2131  }
2132  for (unsigned int i=0; i<n; i++) {
2133  yy[i] = urand.number();
2134  y[i] = yy[i];
2135  }
2136  FadType alpha(ndot, urand.number());
2137  for (unsigned int k=0; k<ndot; k++) {
2138  alpha.fastAccessDx(k) = urand.number();
2139  }
2140 
2141  Teuchos::BLAS<int,FadType> teuchos_blas;
2142  teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
2143 
2144  Teuchos::BLAS<int,FadType> sacado_blas(false);
2145  sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
2146 
2147  COMPARE_FAD_VECTORS(A1, A2, m*n);
2148 
2149  unsigned int sz = m*n*(1+ndot) + m + n;
2150  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2151  sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
2152 
2153  COMPARE_FAD_VECTORS(A1, A3, m*n);
2154 
2155  sacado_blas.GER(m, n, alpha, &xx[0], 1, &yy[0], 1, &A4[0], m);
2156 
2157  COMPARE_FAD_VECTORS(A1, A4, m*n);
2158 
2159  sacado_blas2.GER(m, n, alpha, &xx[0], 1, &yy[0], 1, &A5[0], m);
2160 
2161  COMPARE_FAD_VECTORS(A1, A5, m*n);
2162 }
2163 
2164 // Tests constant A
2165 template <class FadType, class ScalarType>
2166 void
2169 
2170  // GER is apparently not defined in the BLAS for complex types
2172  return;
2173 
2174  VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), x(m,ndot), y(n,ndot);
2175  for (unsigned int j=0; j<n; j++) {
2176  for (unsigned int i=0; i<m; i++) {
2177  ScalarType val = urand.number();
2178  A1[i+j*m] = val;
2179  A2[i+j*m] = val;
2180  A3[i+j*m] = val;
2181  }
2182  }
2183  for (unsigned int i=0; i<m; i++) {
2184  x[i] = FadType(ndot, urand.number());
2185  for (unsigned int k=0; k<ndot; k++)
2186  x[i].fastAccessDx(k) = urand.number();
2187  }
2188  for (unsigned int i=0; i<n; i++) {
2189  y[i] = FadType(ndot, urand.number());
2190  for (unsigned int k=0; k<ndot; k++)
2191  y[i].fastAccessDx(k) = urand.number();
2192  }
2193  FadType alpha(ndot, urand.number());
2194  for (unsigned int k=0; k<ndot; k++) {
2195  alpha.fastAccessDx(k) = urand.number();
2196  }
2197 
2198  Teuchos::BLAS<int,FadType> teuchos_blas;
2199  teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
2200 
2201  Teuchos::BLAS<int,FadType> sacado_blas(false);
2202  sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
2203 
2204  COMPARE_FAD_VECTORS(A1, A2, m*n);
2205 
2206  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
2207  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2208  sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
2209 
2210  COMPARE_FAD_VECTORS(A1, A3, m*n);
2211 }
2212 
2213 // Tests all arguments
2214 template <class FadType, class ScalarType>
2215 void
2218  VectorType A(m*l,ndot), B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
2219  for (unsigned int j=0; j<l; j++) {
2220  for (unsigned int i=0; i<m; i++) {
2221  A[i+j*m] = FadType(ndot, urand.number());
2222  for (unsigned int k=0; k<ndot; k++)
2223  A[i+j*m].fastAccessDx(k) = urand.number();
2224  }
2225  }
2226  for (unsigned int j=0; j<n; j++) {
2227  for (unsigned int i=0; i<l; i++) {
2228  B[i+j*l] = FadType(ndot, urand.number());
2229  for (unsigned int k=0; k<ndot; k++)
2230  B[i+j*l].fastAccessDx(k) = urand.number();
2231  }
2232  }
2233  FadType alpha(ndot, urand.number());
2234  FadType beta(ndot, urand.number());
2235  for (unsigned int k=0; k<ndot; k++) {
2236  alpha.fastAccessDx(k) = urand.number();
2237  beta.fastAccessDx(k) = urand.number();
2238  }
2239 
2240  for (unsigned int j=0; j<n; j++) {
2241  for (unsigned int i=0; i<m; i++) {
2242  ScalarType val = urand.number();
2243  C1[i+j*m] = FadType(ndot, val);
2244  C2[i+j*m] = FadType(ndot, val);
2245  C3[i+j*m] = FadType(ndot, val);
2246  for (unsigned int k=0; k<ndot; k++) {
2247  val = urand.number();
2248  C1[i+j*m].fastAccessDx(k) = val;
2249  C2[i+j*m].fastAccessDx(k) = val;
2250  C3[i+j*m].fastAccessDx(k) = val;
2251  }
2252  }
2253  }
2254 
2255  Teuchos::BLAS<int,FadType> teuchos_blas;
2256  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2257  &A[0], m, &B[0], l, beta, &C1[0], m);
2258 
2259  Teuchos::BLAS<int,FadType> sacado_blas(false);
2260  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2261  &A[0], m, &B[0], l, beta, &C2[0], m);
2262 
2263  COMPARE_FAD_VECTORS(C1, C2, m*n);
2264 
2265  unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2266  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2267  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2268  &A[0], m, &B[0], l, beta, &C3[0], m);
2269 
2270  COMPARE_FAD_VECTORS(C1, C3, m*n);
2271 
2272  // transa
2273  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2274  &A[0], l, &B[0], l, beta, &C1[0], m);
2275  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2276  &A[0], l, &B[0], l, beta, &C2[0], m);
2277  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2278  &A[0], l, &B[0], l, beta, &C3[0], m);
2279 
2280  COMPARE_FAD_VECTORS(C1, C2, m*n);
2281  COMPARE_FAD_VECTORS(C1, C3, m*n);
2282 
2283  // transb
2284  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2285  &A[0], m, &B[0], n, beta, &C1[0], m);
2286  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2287  &A[0], m, &B[0], n, beta, &C2[0], m);
2288  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2289  &A[0], m, &B[0], n, beta, &C3[0], m);
2290 
2291  COMPARE_FAD_VECTORS(C1, C2, m*n);
2292  COMPARE_FAD_VECTORS(C1, C3, m*n);
2293 
2294  // transa and transb
2295  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2296  &A[0], l, &B[0], n, beta, &C1[0], m);
2297  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2298  &A[0], l, &B[0], n, beta, &C2[0], m);
2299  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2300  &A[0], l, &B[0], n, beta, &C3[0], m);
2301 
2302  COMPARE_FAD_VECTORS(C1, C2, m*n);
2303  COMPARE_FAD_VECTORS(C1, C3, m*n);
2304 }
2305 
2306 // Tests different lda, ldb, ldc
2307 template <class FadType, class ScalarType>
2308 void
2311  unsigned int lda = m+4;
2312  unsigned int ldb = l+4;
2313  unsigned int ldc = m+5;
2314  VectorType A(lda*l,ndot), B(ldb*n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
2315  C3(ldc*n,ndot);
2316  for (unsigned int j=0; j<l; j++) {
2317  for (unsigned int i=0; i<lda; i++) {
2318  A[i+j*lda] = FadType(ndot, urand.number());
2319  for (unsigned int k=0; k<ndot; k++)
2320  A[i+j*lda].fastAccessDx(k) = urand.number();
2321  }
2322  }
2323  for (unsigned int j=0; j<n; j++) {
2324  for (unsigned int i=0; i<ldb; i++) {
2325  B[i+j*ldb] = FadType(ndot, urand.number());
2326  for (unsigned int k=0; k<ndot; k++)
2327  B[i+j*ldb].fastAccessDx(k) = urand.number();
2328  }
2329  }
2330  FadType alpha(ndot, urand.number());
2331  FadType beta(ndot, urand.number());
2332  for (unsigned int k=0; k<ndot; k++) {
2333  alpha.fastAccessDx(k) = urand.number();
2334  beta.fastAccessDx(k) = urand.number();
2335  }
2336 
2337  for (unsigned int j=0; j<n; j++) {
2338  for (unsigned int i=0; i<ldc; i++) {
2339  ScalarType val = urand.number();
2340  C1[i+j*ldc] = FadType(ndot, val);
2341  C2[i+j*ldc] = FadType(ndot, val);
2342  C3[i+j*ldc] = FadType(ndot, val);
2343  for (unsigned int k=0; k<ndot; k++) {
2344  val = urand.number();
2345  C1[i+j*ldc].fastAccessDx(k) = val;
2346  C2[i+j*ldc].fastAccessDx(k) = val;
2347  C3[i+j*ldc].fastAccessDx(k) = val;
2348  }
2349  }
2350  }
2351 
2352  Teuchos::BLAS<int,FadType> teuchos_blas;
2353  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2354  &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
2355 
2356  Teuchos::BLAS<int,FadType> sacado_blas(false);
2357  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2358  &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
2359 
2360  COMPARE_FAD_VECTORS(C1, C2, ldc*n);
2361 
2362  unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2363  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2364  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2365  &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
2366 
2367  COMPARE_FAD_VECTORS(C1, C3, ldc*n);
2368 }
2369 
2370 // Tests different lda, ldb, ldc with transa
2371 template <class FadType, class ScalarType>
2372 void
2375  unsigned int lda = l+3;
2376  unsigned int ldb = l+4;
2377  unsigned int ldc = m+5;
2378  VectorType A(lda*m,ndot), B(ldb*n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
2379  C3(ldc*n,ndot);
2380  for (unsigned int j=0; j<m; j++) {
2381  for (unsigned int i=0; i<lda; i++) {
2382  A[i+j*lda] = FadType(ndot, urand.number());
2383  for (unsigned int k=0; k<ndot; k++)
2384  A[i+j*lda].fastAccessDx(k) = urand.number();
2385  }
2386  }
2387  for (unsigned int j=0; j<n; j++) {
2388  for (unsigned int i=0; i<ldb; i++) {
2389  B[i+j*ldb] = FadType(ndot, urand.number());
2390  for (unsigned int k=0; k<ndot; k++)
2391  B[i+j*ldb].fastAccessDx(k) = urand.number();
2392  }
2393  }
2394  FadType alpha(ndot, urand.number());
2395  FadType beta(ndot, urand.number());
2396  for (unsigned int k=0; k<ndot; k++) {
2397  alpha.fastAccessDx(k) = urand.number();
2398  beta.fastAccessDx(k) = urand.number();
2399  }
2400 
2401  for (unsigned int j=0; j<n; j++) {
2402  for (unsigned int i=0; i<ldc; i++) {
2403  ScalarType val = urand.number();
2404  C1[i+j*ldc] = FadType(ndot, val);
2405  C2[i+j*ldc] = FadType(ndot, val);
2406  C3[i+j*ldc] = FadType(ndot, val);
2407  for (unsigned int k=0; k<ndot; k++) {
2408  val = urand.number();
2409  C1[i+j*ldc].fastAccessDx(k) = val;
2410  C2[i+j*ldc].fastAccessDx(k) = val;
2411  C3[i+j*ldc].fastAccessDx(k) = val;
2412  }
2413  }
2414  }
2415 
2416  Teuchos::BLAS<int,FadType> teuchos_blas;
2417  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2418  &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
2419 
2420  Teuchos::BLAS<int,FadType> sacado_blas(false);
2421  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2422  &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
2423 
2424  COMPARE_FAD_VECTORS(C1, C2, ldc*n);
2425 
2426  unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2427  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2428  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2429  &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
2430 
2431  COMPARE_FAD_VECTORS(C1, C3, ldc*n);
2432 }
2433 
2434 // Tests different lda, ldb, ldc with transb
2435 template <class FadType, class ScalarType>
2436 void
2439  unsigned int lda = m+4;
2440  unsigned int ldb = n+4;
2441  unsigned int ldc = m+5;
2442  VectorType A(lda*l,ndot), B(ldb*l,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
2443  C3(ldc*n,ndot);
2444  for (unsigned int j=0; j<l; j++) {
2445  for (unsigned int i=0; i<lda; i++) {
2446  A[i+j*lda] = FadType(ndot, urand.number());
2447  for (unsigned int k=0; k<ndot; k++)
2448  A[i+j*lda].fastAccessDx(k) = urand.number();
2449  }
2450  }
2451  for (unsigned int j=0; j<l; j++) {
2452  for (unsigned int i=0; i<ldb; i++) {
2453  B[i+j*ldb] = FadType(ndot, urand.number());
2454  for (unsigned int k=0; k<ndot; k++)
2455  B[i+j*ldb].fastAccessDx(k) = urand.number();
2456  }
2457  }
2458  FadType alpha(ndot, urand.number());
2459  FadType beta(ndot, urand.number());
2460  for (unsigned int k=0; k<ndot; k++) {
2461  alpha.fastAccessDx(k) = urand.number();
2462  beta.fastAccessDx(k) = urand.number();
2463  }
2464 
2465  for (unsigned int j=0; j<n; j++) {
2466  for (unsigned int i=0; i<ldc; i++) {
2467  ScalarType val = urand.number();
2468  C1[i+j*ldc] = FadType(ndot, val);
2469  C2[i+j*ldc] = FadType(ndot, val);
2470  C3[i+j*ldc] = FadType(ndot, val);
2471  for (unsigned int k=0; k<ndot; k++) {
2472  val = urand.number();
2473  C1[i+j*ldc].fastAccessDx(k) = val;
2474  C2[i+j*ldc].fastAccessDx(k) = val;
2475  C3[i+j*ldc].fastAccessDx(k) = val;
2476  }
2477  }
2478  }
2479 
2480  Teuchos::BLAS<int,FadType> teuchos_blas;
2481  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2482  &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
2483 
2484  Teuchos::BLAS<int,FadType> sacado_blas(false);
2485  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2486  &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
2487 
2488  COMPARE_FAD_VECTORS(C1, C2, ldc*n);
2489 
2490  unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2491  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2492  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2493  &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
2494 
2495  COMPARE_FAD_VECTORS(C1, C3, ldc*n);
2496 }
2497 
2498 // Tests different lda, ldb, ldc with transa and transb
2499 template <class FadType, class ScalarType>
2500 void
2503  unsigned int lda = l+3;
2504  unsigned int ldb = n+4;
2505  unsigned int ldc = m+5;
2506  VectorType A(lda*m,ndot), B(ldb*l,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
2507  C3(ldc*n,ndot);
2508  for (unsigned int j=0; j<m; j++) {
2509  for (unsigned int i=0; i<lda; i++) {
2510  A[i+j*lda] = FadType(ndot, urand.number());
2511  for (unsigned int k=0; k<ndot; k++)
2512  A[i+j*lda].fastAccessDx(k) = urand.number();
2513  }
2514  }
2515  for (unsigned int j=0; j<l; j++) {
2516  for (unsigned int i=0; i<ldb; i++) {
2517  B[i+j*ldb] = FadType(ndot, urand.number());
2518  for (unsigned int k=0; k<ndot; k++)
2519  B[i+j*ldb].fastAccessDx(k) = urand.number();
2520  }
2521  }
2522  FadType alpha(ndot, urand.number());
2523  FadType beta(ndot, urand.number());
2524  for (unsigned int k=0; k<ndot; k++) {
2525  alpha.fastAccessDx(k) = urand.number();
2526  beta.fastAccessDx(k) = urand.number();
2527  }
2528 
2529  for (unsigned int j=0; j<n; j++) {
2530  for (unsigned int i=0; i<ldc; i++) {
2531  ScalarType val = urand.number();
2532  C1[i+j*ldc] = FadType(ndot, val);
2533  C2[i+j*ldc] = FadType(ndot, val);
2534  C3[i+j*ldc] = FadType(ndot, val);
2535  for (unsigned int k=0; k<ndot; k++) {
2536  val = urand.number();
2537  C1[i+j*ldc].fastAccessDx(k) = val;
2538  C2[i+j*ldc].fastAccessDx(k) = val;
2539  C3[i+j*ldc].fastAccessDx(k) = val;
2540  }
2541  }
2542  }
2543 
2544  Teuchos::BLAS<int,FadType> teuchos_blas;
2545  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2546  &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
2547 
2548  Teuchos::BLAS<int,FadType> sacado_blas(false);
2549  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2550  &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
2551 
2552  COMPARE_FAD_VECTORS(C1, C2, ldc*n);
2553 
2554  unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2555  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2556  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2557  &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
2558 
2559  COMPARE_FAD_VECTORS(C1, C3, ldc*n);
2560 }
2561 
2562 // Tests with constant C
2563 template <class FadType, class ScalarType>
2564 void
2567  VectorType A(m*l,ndot), B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
2568  for (unsigned int j=0; j<l; j++) {
2569  for (unsigned int i=0; i<m; i++) {
2570  A[i+j*m] = FadType(ndot, urand.number());
2571  for (unsigned int k=0; k<ndot; k++)
2572  A[i+j*m].fastAccessDx(k) = urand.number();
2573  }
2574  }
2575  for (unsigned int j=0; j<n; j++) {
2576  for (unsigned int i=0; i<l; i++) {
2577  B[i+j*l] = FadType(ndot, urand.number());
2578  for (unsigned int k=0; k<ndot; k++)
2579  B[i+j*l].fastAccessDx(k) = urand.number();
2580  }
2581  }
2582  FadType alpha(ndot, urand.number());
2583  FadType beta(ndot, urand.number());
2584  for (unsigned int k=0; k<ndot; k++) {
2585  alpha.fastAccessDx(k) = urand.number();
2586  beta.fastAccessDx(k) = urand.number();
2587  }
2588 
2589  for (unsigned int j=0; j<n; j++) {
2590  for (unsigned int i=0; i<m; i++) {
2591  ScalarType val = urand.number();
2592  C1[i+j*m] = val;
2593  C2[i+j*m] = val;
2594  C3[i+j*m] = val;
2595  }
2596  }
2597 
2598  Teuchos::BLAS<int,FadType> teuchos_blas;
2599  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2600  &A[0], m, &B[0], l, beta, &C1[0], m);
2601 
2602  Teuchos::BLAS<int,FadType> sacado_blas(false);
2603  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2604  &A[0], m, &B[0], l, beta, &C2[0], m);
2605 
2606  COMPARE_FAD_VECTORS(C1, C2, m*n);
2607 
2608  unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2609  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2610  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2611  &A[0], m, &B[0], l, beta, &C3[0], m);
2612 
2613  COMPARE_FAD_VECTORS(C1, C3, m*n);
2614 
2615  // transa
2616  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2617  &A[0], l, &B[0], l, beta, &C1[0], m);
2618  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2619  &A[0], l, &B[0], l, beta, &C2[0], m);
2620  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2621  &A[0], l, &B[0], l, beta, &C3[0], m);
2622 
2623  COMPARE_FAD_VECTORS(C1, C2, m*n);
2624  COMPARE_FAD_VECTORS(C1, C3, m*n);
2625 
2626  // transb
2627  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2628  &A[0], m, &B[0], n, beta, &C1[0], m);
2629  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2630  &A[0], m, &B[0], n, beta, &C2[0], m);
2631  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2632  &A[0], m, &B[0], n, beta, &C3[0], m);
2633 
2634  COMPARE_FAD_VECTORS(C1, C2, m*n);
2635  COMPARE_FAD_VECTORS(C1, C3, m*n);
2636 
2637  // transa and transb
2638  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2639  &A[0], l, &B[0], n, beta, &C1[0], m);
2640  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2641  &A[0], l, &B[0], n, beta, &C2[0], m);
2642  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2643  &A[0], l, &B[0], n, beta, &C3[0], m);
2644 
2645  COMPARE_FAD_VECTORS(C1, C2, m*n);
2646  COMPARE_FAD_VECTORS(C1, C3, m*n);
2647 }
2648 
2649 // Tests with constant alpha, beta
2650 template <class FadType, class ScalarType>
2651 void
2654  VectorType A(m*l,ndot), B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
2655  for (unsigned int j=0; j<l; j++) {
2656  for (unsigned int i=0; i<m; i++) {
2657  A[i+j*m] = FadType(ndot, urand.number());
2658  for (unsigned int k=0; k<ndot; k++)
2659  A[i+j*m].fastAccessDx(k) = urand.number();
2660  }
2661  }
2662  for (unsigned int j=0; j<n; j++) {
2663  for (unsigned int i=0; i<l; i++) {
2664  B[i+j*l] = FadType(ndot, urand.number());
2665  for (unsigned int k=0; k<ndot; k++)
2666  B[i+j*l].fastAccessDx(k) = urand.number();
2667  }
2668  }
2669  ScalarType alpha = urand.number();
2670  ScalarType beta = urand.number();
2671 
2672  for (unsigned int j=0; j<n; j++) {
2673  for (unsigned int i=0; i<m; i++) {
2674  ScalarType val = urand.number();
2675  C1[i+j*m] = FadType(ndot, val);
2676  C2[i+j*m] = FadType(ndot, val);
2677  C3[i+j*m] = FadType(ndot, val);
2678  for (unsigned int k=0; k<ndot; k++) {
2679  val = urand.number();
2680  C1[i+j*m].fastAccessDx(k) = val;
2681  C2[i+j*m].fastAccessDx(k) = val;
2682  C3[i+j*m].fastAccessDx(k) = val;
2683  }
2684  }
2685  }
2686 
2687  Teuchos::BLAS<int,FadType> teuchos_blas;
2688  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2689  &A[0], m, &B[0], l, beta, &C1[0], m);
2690 
2691  Teuchos::BLAS<int,FadType> sacado_blas(false);
2692  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2693  &A[0], m, &B[0], l, beta, &C2[0], m);
2694 
2695  COMPARE_FAD_VECTORS(C1, C2, m*n);
2696 
2697  unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2698  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2699  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2700  &A[0], m, &B[0], l, beta, &C3[0], m);
2701 
2702  COMPARE_FAD_VECTORS(C1, C3, m*n);
2703 
2704  // transa
2705  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2706  &A[0], l, &B[0], l, beta, &C1[0], m);
2707  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2708  &A[0], l, &B[0], l, beta, &C2[0], m);
2709  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2710  &A[0], l, &B[0], l, beta, &C3[0], m);
2711 
2712  COMPARE_FAD_VECTORS(C1, C2, m*n);
2713  COMPARE_FAD_VECTORS(C1, C3, m*n);
2714 
2715  // transb
2716  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2717  &A[0], m, &B[0], n, beta, &C1[0], m);
2718  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2719  &A[0], m, &B[0], n, beta, &C2[0], m);
2720  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2721  &A[0], m, &B[0], n, beta, &C3[0], m);
2722 
2723  COMPARE_FAD_VECTORS(C1, C2, m*n);
2724  COMPARE_FAD_VECTORS(C1, C3, m*n);
2725 
2726  // transa and transb
2727  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2728  &A[0], l, &B[0], n, beta, &C1[0], m);
2729  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2730  &A[0], l, &B[0], n, beta, &C2[0], m);
2731  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2732  &A[0], l, &B[0], n, beta, &C3[0], m);
2733 
2734  COMPARE_FAD_VECTORS(C1, C2, m*n);
2735  COMPARE_FAD_VECTORS(C1, C3, m*n);
2736 }
2737 
2738 // Tests with constant A
2739 template <class FadType, class ScalarType>
2740 void
2743  VectorType A(m*l,ndot), B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
2744  C4(m*n,ndot), C5(m*n,ndot);
2745  std::vector<ScalarType> a(m*l);
2746  for (unsigned int j=0; j<l; j++) {
2747  for (unsigned int i=0; i<m; i++) {
2748  a[i+j*m] = urand.number();
2749  A[i+j*m] = a[i+j*m];
2750  }
2751  }
2752  for (unsigned int j=0; j<n; j++) {
2753  for (unsigned int i=0; i<l; i++) {
2754  B[i+j*l] = FadType(ndot, urand.number());
2755  for (unsigned int k=0; k<ndot; k++)
2756  B[i+j*l].fastAccessDx(k) = urand.number();
2757  }
2758  }
2759  FadType alpha(ndot, urand.number());
2760  FadType beta(ndot, urand.number());
2761  for (unsigned int k=0; k<ndot; k++) {
2762  alpha.fastAccessDx(k) = urand.number();
2763  beta.fastAccessDx(k) = urand.number();
2764  }
2765 
2766  for (unsigned int j=0; j<n; j++) {
2767  for (unsigned int i=0; i<m; i++) {
2768  ScalarType val = urand.number();
2769  C1[i+j*m] = FadType(ndot, val);
2770  C2[i+j*m] = FadType(ndot, val);
2771  C3[i+j*m] = FadType(ndot, val);
2772  C4[i+j*m] = FadType(ndot, val);
2773  C5[i+j*m] = FadType(ndot, val);
2774  for (unsigned int k=0; k<ndot; k++) {
2775  val = urand.number();
2776  C1[i+j*m].fastAccessDx(k) = val;
2777  C2[i+j*m].fastAccessDx(k) = val;
2778  C3[i+j*m].fastAccessDx(k) = val;
2779  C4[i+j*m].fastAccessDx(k) = val;
2780  C5[i+j*m].fastAccessDx(k) = val;
2781  }
2782  }
2783  }
2784 
2785  Teuchos::BLAS<int,FadType> teuchos_blas;
2786  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2787  &A[0], m, &B[0], l, beta, &C1[0], m);
2788 
2789  Teuchos::BLAS<int,FadType> sacado_blas(false);
2790  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2791  &A[0], m, &B[0], l, beta, &C2[0], m);
2792 
2793  COMPARE_FAD_VECTORS(C1, C2, m*n);
2794 
2795  unsigned int sz = m*l + l*n*(1+ndot) + m*n*(1+ndot);
2796  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2797  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2798  &A[0], m, &B[0], l, beta, &C3[0], m);
2799 
2800  COMPARE_FAD_VECTORS(C1, C3, m*n);
2801 
2802  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2803  &a[0], m, &B[0], l, beta, &C4[0], m);
2804 
2805  COMPARE_FAD_VECTORS(C1, C4, m*n);
2806 
2807  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2808  &a[0], m, &B[0], l, beta, &C5[0], m);
2809 
2810  COMPARE_FAD_VECTORS(C1, C5, m*n);
2811 
2812  // transa
2813  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2814  &A[0], l, &B[0], l, beta, &C1[0], m);
2815  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2816  &A[0], l, &B[0], l, beta, &C2[0], m);
2817  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2818  &A[0], l, &B[0], l, beta, &C3[0], m);
2819  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2820  &a[0], l, &B[0], l, beta, &C4[0], m);
2821  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2822  &a[0], l, &B[0], l, beta, &C5[0], m);
2823 
2824  COMPARE_FAD_VECTORS(C1, C2, m*n);
2825  COMPARE_FAD_VECTORS(C1, C3, m*n);
2826  COMPARE_FAD_VECTORS(C1, C4, m*n);
2827  COMPARE_FAD_VECTORS(C1, C5, m*n);
2828 
2829  // transb
2830  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2831  &A[0], m, &B[0], n, beta, &C1[0], m);
2832  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2833  &A[0], m, &B[0], n, beta, &C2[0], m);
2834  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2835  &A[0], m, &B[0], n, beta, &C3[0], m);
2836  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2837  &a[0], m, &B[0], n, beta, &C4[0], m);
2838  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2839  &a[0], m, &B[0], n, beta, &C5[0], m);
2840 
2841  COMPARE_FAD_VECTORS(C1, C2, m*n);
2842  COMPARE_FAD_VECTORS(C1, C3, m*n);
2843  COMPARE_FAD_VECTORS(C1, C4, m*n);
2844  COMPARE_FAD_VECTORS(C1, C5, m*n);
2845 
2846  // transa and transb
2847  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2848  &A[0], l, &B[0], n, beta, &C1[0], m);
2849  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2850  &A[0], l, &B[0], n, beta, &C2[0], m);
2851  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2852  &A[0], l, &B[0], n, beta, &C3[0], m);
2853  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2854  &a[0], l, &B[0], n, beta, &C4[0], m);
2855  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2856  &a[0], l, &B[0], n, beta, &C5[0], m);
2857 
2858  COMPARE_FAD_VECTORS(C1, C2, m*n);
2859  COMPARE_FAD_VECTORS(C1, C3, m*n);
2860  COMPARE_FAD_VECTORS(C1, C4, m*n);
2861  COMPARE_FAD_VECTORS(C1, C5, m*n);
2862 }
2863 
2864 // Tests with constant B
2865 template <class FadType, class ScalarType>
2866 void
2869  VectorType A(m*l,ndot), B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
2870  C4(m*n,ndot), C5(m*n,ndot);
2871  std::vector<ScalarType> b(l*n);
2872  for (unsigned int j=0; j<l; j++) {
2873  for (unsigned int i=0; i<m; i++) {
2874  A[i+j*m] = FadType(ndot, urand.number());
2875  for (unsigned int k=0; k<ndot; k++)
2876  A[i+j*m].fastAccessDx(k) = urand.number();
2877  }
2878  }
2879  for (unsigned int j=0; j<n; j++) {
2880  for (unsigned int i=0; i<l; i++) {
2881  b[i+j*l] = urand.number();
2882  B[i+j*l] = b[i+j*l];
2883  }
2884  }
2885  FadType alpha(ndot, urand.number());
2886  FadType beta(ndot, urand.number());
2887  for (unsigned int k=0; k<ndot; k++) {
2888  alpha.fastAccessDx(k) = urand.number();
2889  beta.fastAccessDx(k) = urand.number();
2890  }
2891 
2892  for (unsigned int j=0; j<n; j++) {
2893  for (unsigned int i=0; i<m; i++) {
2894  ScalarType val = urand.number();
2895  C1[i+j*m] = FadType(ndot, val);
2896  C2[i+j*m] = FadType(ndot, val);
2897  C3[i+j*m] = FadType(ndot, val);
2898  C4[i+j*m] = FadType(ndot, val);
2899  C5[i+j*m] = FadType(ndot, val);
2900  for (unsigned int k=0; k<ndot; k++) {
2901  val = urand.number();
2902  C1[i+j*m].fastAccessDx(k) = val;
2903  C2[i+j*m].fastAccessDx(k) = val;
2904  C3[i+j*m].fastAccessDx(k) = val;
2905  C4[i+j*m].fastAccessDx(k) = val;
2906  C5[i+j*m].fastAccessDx(k) = val;
2907  }
2908  }
2909  }
2910 
2911  Teuchos::BLAS<int,FadType> teuchos_blas;
2912  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2913  &A[0], m, &B[0], l, beta, &C1[0], m);
2914 
2915  Teuchos::BLAS<int,FadType> sacado_blas(false);
2916  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2917  &A[0], m, &B[0], l, beta, &C2[0], m);
2918 
2919  COMPARE_FAD_VECTORS(C1, C2, m*n);
2920 
2921  unsigned int sz = m*l*(1+ndot) + l*n + m*n*(1+ndot);
2922  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2923  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2924  &A[0], m, &B[0], l, beta, &C3[0], m);
2925 
2926  COMPARE_FAD_VECTORS(C1, C3, m*n);
2927 
2928  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2929  &A[0], m, &b[0], l, beta, &C4[0], m);
2930 
2931  COMPARE_FAD_VECTORS(C1, C4, m*n);
2932 
2933  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2934  &A[0], m, &b[0], l, beta, &C5[0], m);
2935 
2936  COMPARE_FAD_VECTORS(C1, C5, m*n);
2937 
2938  // transa
2939  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2940  &A[0], l, &B[0], l, beta, &C1[0], m);
2941  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2942  &A[0], l, &B[0], l, beta, &C2[0], m);
2943  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2944  &A[0], l, &B[0], l, beta, &C3[0], m);
2945  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2946  &A[0], l, &b[0], l, beta, &C4[0], m);
2947  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2948  &A[0], l, &b[0], l, beta, &C5[0], m);
2949 
2950  COMPARE_FAD_VECTORS(C1, C2, m*n);
2951  COMPARE_FAD_VECTORS(C1, C3, m*n);
2952  COMPARE_FAD_VECTORS(C1, C4, m*n);
2953  COMPARE_FAD_VECTORS(C1, C5, m*n);
2954 
2955  // transb
2956  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2957  &A[0], m, &B[0], n, beta, &C1[0], m);
2958  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2959  &A[0], m, &B[0], n, beta, &C2[0], m);
2960  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2961  &A[0], m, &B[0], n, beta, &C3[0], m);
2962  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2963  &A[0], m, &b[0], n, beta, &C4[0], m);
2964  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2965  &A[0], m, &b[0], n, beta, &C5[0], m);
2966 
2967  COMPARE_FAD_VECTORS(C1, C2, m*n);
2968  COMPARE_FAD_VECTORS(C1, C3, m*n);
2969  COMPARE_FAD_VECTORS(C1, C4, m*n);
2970  COMPARE_FAD_VECTORS(C1, C5, m*n);
2971 
2972  // transa and transb
2973  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2974  &A[0], l, &B[0], n, beta, &C1[0], m);
2975  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2976  &A[0], l, &B[0], n, beta, &C2[0], m);
2977  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2978  &A[0], l, &B[0], n, beta, &C3[0], m);
2979  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2980  &A[0], l, &b[0], n, beta, &C4[0], m);
2981  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2982  &A[0], l, &b[0], n, beta, &C5[0], m);
2983 
2984  COMPARE_FAD_VECTORS(C1, C2, m*n);
2985  COMPARE_FAD_VECTORS(C1, C3, m*n);
2986  COMPARE_FAD_VECTORS(C1, C4, m*n);
2987  COMPARE_FAD_VECTORS(C1, C5, m*n);
2988 }
2989 
2990 // Tests with constant A and B
2991 template <class FadType, class ScalarType>
2992 void
2995  VectorType A(m*l,ndot), B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
2996  C4(m*n,ndot), C5(m*n,ndot);
2997  std::vector<ScalarType> a(m*l), b(l*n);
2998  for (unsigned int j=0; j<l; j++) {
2999  for (unsigned int i=0; i<m; i++) {
3000  a[i+j*m] = urand.number();
3001  A[i+j*m] = a[i+j*m];
3002  }
3003  }
3004  for (unsigned int j=0; j<n; j++) {
3005  for (unsigned int i=0; i<l; i++) {
3006  b[i+j*l] = urand.number();
3007  B[i+j*l] = b[i+j*l];
3008  }
3009  }
3010  FadType alpha(ndot, urand.number());
3011  FadType beta(ndot, urand.number());
3012  for (unsigned int k=0; k<ndot; k++) {
3013  alpha.fastAccessDx(k) = urand.number();
3014  beta.fastAccessDx(k) = urand.number();
3015  }
3016 
3017  for (unsigned int j=0; j<n; j++) {
3018  for (unsigned int i=0; i<m; i++) {
3019  ScalarType val = urand.number();
3020  C1[i+j*m] = FadType(ndot, val);
3021  C2[i+j*m] = FadType(ndot, val);
3022  C3[i+j*m] = FadType(ndot, val);
3023  C4[i+j*m] = FadType(ndot, val);
3024  C5[i+j*m] = FadType(ndot, val);
3025  for (unsigned int k=0; k<ndot; k++) {
3026  val = urand.number();
3027  C1[i+j*m].fastAccessDx(k) = val;
3028  C2[i+j*m].fastAccessDx(k) = val;
3029  C3[i+j*m].fastAccessDx(k) = val;
3030  C4[i+j*m].fastAccessDx(k) = val;
3031  C5[i+j*m].fastAccessDx(k) = val;
3032  }
3033  }
3034  }
3035 
3036  Teuchos::BLAS<int,FadType> teuchos_blas;
3037  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3038  &A[0], m, &B[0], l, beta, &C1[0], m);
3039 
3040  Teuchos::BLAS<int,FadType> sacado_blas(false);
3041  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3042  &A[0], m, &B[0], l, beta, &C2[0], m);
3043 
3044  COMPARE_FAD_VECTORS(C1, C2, m*n);
3045 
3046  unsigned int sz = m*l + l*n + m*n*(1+ndot);
3047  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3048  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3049  &A[0], m, &B[0], l, beta, &C3[0], m);
3050 
3051  COMPARE_FAD_VECTORS(C1, C3, m*n);
3052 
3053  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3054  &a[0], m, &b[0], l, beta, &C4[0], m);
3055 
3056  COMPARE_FAD_VECTORS(C1, C4, m*n);
3057 
3058  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3059  &a[0], m, &b[0], l, beta, &C5[0], m);
3060 
3061  COMPARE_FAD_VECTORS(C1, C5, m*n);
3062 
3063  // transa
3064  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3065  &A[0], l, &B[0], l, beta, &C1[0], m);
3066  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3067  &A[0], l, &B[0], l, beta, &C2[0], m);
3068  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3069  &A[0], l, &B[0], l, beta, &C3[0], m);
3070  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3071  &a[0], l, &b[0], l, beta, &C4[0], m);
3072  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3073  &a[0], l, &b[0], l, beta, &C5[0], m);
3074 
3075  COMPARE_FAD_VECTORS(C1, C2, m*n);
3076  COMPARE_FAD_VECTORS(C1, C3, m*n);
3077  COMPARE_FAD_VECTORS(C1, C4, m*n);
3078  COMPARE_FAD_VECTORS(C1, C5, m*n);
3079 
3080  // transb
3081  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
3082  &A[0], m, &B[0], n, beta, &C1[0], m);
3083  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
3084  &A[0], m, &B[0], n, beta, &C2[0], m);
3085  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
3086  &A[0], m, &B[0], n, beta, &C3[0], m);
3087  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
3088  &a[0], m, &b[0], n, beta, &C4[0], m);
3089  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
3090  &a[0], m, &b[0], n, beta, &C5[0], m);
3091 
3092  COMPARE_FAD_VECTORS(C1, C2, m*n);
3093  COMPARE_FAD_VECTORS(C1, C3, m*n);
3094  COMPARE_FAD_VECTORS(C1, C4, m*n);
3095  COMPARE_FAD_VECTORS(C1, C5, m*n);
3096 
3097  // transa and transb
3098  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
3099  &A[0], l, &B[0], n, beta, &C1[0], m);
3100  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
3101  &A[0], l, &B[0], n, beta, &C2[0], m);
3102  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
3103  &A[0], l, &B[0], n, beta, &C3[0], m);
3104  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
3105  &a[0], l, &b[0], n, beta, &C4[0], m);
3106  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
3107  &a[0], l, &b[0], n, beta, &C5[0], m);
3108 
3109  COMPARE_FAD_VECTORS(C1, C2, m*n);
3110  COMPARE_FAD_VECTORS(C1, C3, m*n);
3111  COMPARE_FAD_VECTORS(C1, C4, m*n);
3112  COMPARE_FAD_VECTORS(C1, C5, m*n);
3113 }
3114 
3115 // Tests all arguments, left side
3116 template <class FadType, class ScalarType>
3117 void
3120 
3121  // SYMM is apparently not defined in the BLAS for complex types
3123  return;
3124 
3125  VectorType A(m*m,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3126  for (unsigned int j=0; j<m; j++) {
3127  for (unsigned int i=0; i<m; i++) {
3128  A[i+j*m] = FadType(ndot, urand.number());
3129  for (unsigned int k=0; k<ndot; k++)
3130  A[i+j*m].fastAccessDx(k) = urand.number();
3131  }
3132  }
3133  for (unsigned int j=0; j<n; j++) {
3134  for (unsigned int i=0; i<m; i++) {
3135  B[i+j*m] = FadType(ndot, urand.number());
3136  for (unsigned int k=0; k<ndot; k++)
3137  B[i+j*m].fastAccessDx(k) = urand.number();
3138  }
3139  }
3140  FadType alpha(ndot, urand.number());
3141  FadType beta(ndot, urand.number());
3142  for (unsigned int k=0; k<ndot; k++) {
3143  alpha.fastAccessDx(k) = urand.number();
3144  beta.fastAccessDx(k) = urand.number();
3145  }
3146 
3147  for (unsigned int j=0; j<n; j++) {
3148  for (unsigned int i=0; i<m; i++) {
3149  ScalarType val = urand.number();
3150  C1[i+j*m] = FadType(ndot, val);
3151  C2[i+j*m] = FadType(ndot, val);
3152  C3[i+j*m] = FadType(ndot, val);
3153  for (unsigned int k=0; k<ndot; k++) {
3154  val = urand.number();
3155  C1[i+j*m].fastAccessDx(k) = val;
3156  C2[i+j*m].fastAccessDx(k) = val;
3157  C3[i+j*m].fastAccessDx(k) = val;
3158  }
3159  }
3160  }
3161 
3162  Teuchos::BLAS<int,FadType> teuchos_blas;
3163  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3164  &A[0], m, &B[0], m, beta, &C1[0], m);
3165 
3166  Teuchos::BLAS<int,FadType> sacado_blas(false);
3167  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3168  &A[0], m, &B[0], m, beta, &C2[0], m);
3169 
3170  COMPARE_FAD_VECTORS(C1, C2, m*n);
3171 
3172  unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3173  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3174  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3175  &A[0], m, &B[0], m, beta, &C3[0], m);
3176 
3177  COMPARE_FAD_VECTORS(C1, C3, m*n);
3178 
3179  // lower tri
3180  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3181  &A[0], m, &B[0], m, beta, &C1[0], m);
3182  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3183  &A[0], m, &B[0], m, beta, &C2[0], m);
3184  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3185  &A[0], m, &B[0], m, beta, &C3[0], m);
3186 
3187  COMPARE_FAD_VECTORS(C1, C2, m*n);
3188  COMPARE_FAD_VECTORS(C1, C3, m*n);
3189 }
3190 
3191 // Tests all arguments, right side
3192 template <class FadType, class ScalarType>
3193 void
3196 
3197  // SYMM is apparently not defined in the BLAS for complex types
3199  return;
3200 
3201  VectorType A(n*n,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3202  for (unsigned int j=0; j<n; j++) {
3203  for (unsigned int i=0; i<n; i++) {
3204  A[i+j*n] = FadType(ndot, urand.number());
3205  for (unsigned int k=0; k<ndot; k++)
3206  A[i+j*n].fastAccessDx(k) = urand.number();
3207  }
3208  }
3209  for (unsigned int j=0; j<n; j++) {
3210  for (unsigned int i=0; i<m; i++) {
3211  B[i+j*m] = FadType(ndot, urand.number());
3212  for (unsigned int k=0; k<ndot; k++)
3213  B[i+j*m].fastAccessDx(k) = urand.number();
3214  }
3215  }
3216  FadType alpha(ndot, urand.number());
3217  FadType beta(ndot, urand.number());
3218  for (unsigned int k=0; k<ndot; k++) {
3219  alpha.fastAccessDx(k) = urand.number();
3220  beta.fastAccessDx(k) = urand.number();
3221  }
3222 
3223  for (unsigned int j=0; j<n; j++) {
3224  for (unsigned int i=0; i<m; i++) {
3225  ScalarType val = urand.number();
3226  C1[i+j*m] = FadType(ndot, val);
3227  C2[i+j*m] = FadType(ndot, val);
3228  C3[i+j*m] = FadType(ndot, val);
3229  for (unsigned int k=0; k<ndot; k++) {
3230  val = urand.number();
3231  C1[i+j*m].fastAccessDx(k) = val;
3232  C2[i+j*m].fastAccessDx(k) = val;
3233  C3[i+j*m].fastAccessDx(k) = val;
3234  }
3235  }
3236  }
3237 
3238  Teuchos::BLAS<int,FadType> teuchos_blas;
3239  teuchos_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3240  &A[0], n, &B[0], m, beta, &C1[0], m);
3241 
3242  Teuchos::BLAS<int,FadType> sacado_blas(false);
3243  sacado_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3244  &A[0], n, &B[0], m, beta, &C2[0], m);
3245 
3246  COMPARE_FAD_VECTORS(C1, C2, m*n);
3247 
3248  unsigned int sz = n*n*(1+ndot) + 2*m*n*(1+ndot);
3249  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3250  sacado_blas2.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3251  &A[0], n, &B[0], m, beta, &C3[0], m);
3252 
3253  COMPARE_FAD_VECTORS(C1, C3, m*n);
3254 
3255  // lower tri
3256  teuchos_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3257  &A[0], n, &B[0], m, beta, &C1[0], m);
3258  sacado_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3259  &A[0], n, &B[0], m, beta, &C2[0], m);
3260  sacado_blas2.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3261  &A[0], n, &B[0], m, beta, &C3[0], m);
3262 
3263  COMPARE_FAD_VECTORS(C1, C2, m*n);
3264  COMPARE_FAD_VECTORS(C1, C3, m*n);
3265 }
3266 
3267 // Tests different lda, ldb, ldc, left side
3268 template <class FadType, class ScalarType>
3269 void
3272 
3273  // SYMM is apparently not defined in the BLAS for complex types
3275  return;
3276 
3277  unsigned int lda = m+4;
3278  unsigned int ldb = m+5;
3279  unsigned int ldc = m+6;
3280  VectorType A(lda*m,ndot), B(ldb*n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
3281  C3(ldc*n,ndot);
3282  for (unsigned int j=0; j<m; j++) {
3283  for (unsigned int i=0; i<lda; i++) {
3284  A[i+j*lda] = FadType(ndot, urand.number());
3285  for (unsigned int k=0; k<ndot; k++)
3286  A[i+j*lda].fastAccessDx(k) = urand.number();
3287  }
3288  }
3289  for (unsigned int j=0; j<n; j++) {
3290  for (unsigned int i=0; i<ldb; i++) {
3291  B[i+j*ldb] = FadType(ndot, urand.number());
3292  for (unsigned int k=0; k<ndot; k++)
3293  B[i+j*ldb].fastAccessDx(k) = urand.number();
3294  }
3295  }
3296  FadType alpha(ndot, urand.number());
3297  FadType beta(ndot, urand.number());
3298  for (unsigned int k=0; k<ndot; k++) {
3299  alpha.fastAccessDx(k) = urand.number();
3300  beta.fastAccessDx(k) = urand.number();
3301  }
3302 
3303  for (unsigned int j=0; j<n; j++) {
3304  for (unsigned int i=0; i<ldc; i++) {
3305  ScalarType val = urand.number();
3306  C1[i+j*ldc] = FadType(ndot, val);
3307  C2[i+j*ldc] = FadType(ndot, val);
3308  C3[i+j*ldc] = FadType(ndot, val);
3309  for (unsigned int k=0; k<ndot; k++) {
3310  val = urand.number();
3311  C1[i+j*ldc].fastAccessDx(k) = val;
3312  C2[i+j*ldc].fastAccessDx(k) = val;
3313  C3[i+j*ldc].fastAccessDx(k) = val;
3314  }
3315  }
3316  }
3317 
3318  Teuchos::BLAS<int,FadType> teuchos_blas;
3319  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3320  &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
3321 
3322  Teuchos::BLAS<int,FadType> sacado_blas(false);
3323  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3324  &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
3325 
3326  COMPARE_FAD_VECTORS(C1, C2, ldc*n);
3327 
3328  unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3329  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3330  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3331  &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
3332 
3333  COMPARE_FAD_VECTORS(C1, C3, ldc*n);
3334 
3335  // lower tri
3336  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3337  &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
3338  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3339  &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
3340  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3341  &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
3342 
3343  COMPARE_FAD_VECTORS(C1, C2, ldc*n);
3344  COMPARE_FAD_VECTORS(C1, C3, ldc*n);
3345 }
3346 
3347 // Tests different lda, ldb, ldc, right side
3348 template <class FadType, class ScalarType>
3349 void
3352 
3353  // SYMM is apparently not defined in the BLAS for complex types
3355  return;
3356 
3357  unsigned int lda = n+4;
3358  unsigned int ldb = m+5;
3359  unsigned int ldc = m+6;
3360  VectorType A(lda*n,ndot), B(ldb*n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
3361  C3(ldc*n,ndot);
3362  for (unsigned int j=0; j<n; j++) {
3363  for (unsigned int i=0; i<lda; i++) {
3364  A[i+j*lda] = FadType(ndot, urand.number());
3365  for (unsigned int k=0; k<ndot; k++)
3366  A[i+j*lda].fastAccessDx(k) = urand.number();
3367  }
3368  }
3369  for (unsigned int j=0; j<n; j++) {
3370  for (unsigned int i=0; i<ldb; i++) {
3371  B[i+j*ldb] = FadType(ndot, urand.number());
3372  for (unsigned int k=0; k<ndot; k++)
3373  B[i+j*ldb].fastAccessDx(k) = urand.number();
3374  }
3375  }
3376  FadType alpha(ndot, urand.number());
3377  FadType beta(ndot, urand.number());
3378  for (unsigned int k=0; k<ndot; k++) {
3379  alpha.fastAccessDx(k) = urand.number();
3380  beta.fastAccessDx(k) = urand.number();
3381  }
3382 
3383  for (unsigned int j=0; j<n; j++) {
3384  for (unsigned int i=0; i<ldc; i++) {
3385  ScalarType val = urand.number();
3386  C1[i+j*ldc] = FadType(ndot, val);
3387  C2[i+j*ldc] = FadType(ndot, val);
3388  C3[i+j*ldc] = FadType(ndot, val);
3389  for (unsigned int k=0; k<ndot; k++) {
3390  val = urand.number();
3391  C1[i+j*ldc].fastAccessDx(k) = val;
3392  C2[i+j*ldc].fastAccessDx(k) = val;
3393  C3[i+j*ldc].fastAccessDx(k) = val;
3394  }
3395  }
3396  }
3397 
3398  Teuchos::BLAS<int,FadType> teuchos_blas;
3399  teuchos_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3400  &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
3401 
3402  Teuchos::BLAS<int,FadType> sacado_blas(false);
3403  sacado_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3404  &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
3405 
3406  COMPARE_FAD_VECTORS(C1, C2, ldc*n);
3407 
3408  unsigned int sz = n*n*(1+ndot) + 2*m*n*(1+ndot);
3409  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3410  sacado_blas2.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3411  &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
3412 
3413  COMPARE_FAD_VECTORS(C1, C3, ldc*n);
3414 
3415  // lower tri
3416  teuchos_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3417  &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
3418  sacado_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3419  &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
3420  sacado_blas2.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3421  &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
3422 
3423  COMPARE_FAD_VECTORS(C1, C2, ldc*n);
3424  COMPARE_FAD_VECTORS(C1, C3, ldc*n);
3425 }
3426 
3427 // Tests with constant C
3428 template <class FadType, class ScalarType>
3429 void
3432 
3433  // SYMM is apparently not defined in the BLAS for complex types
3435  return;
3436 
3437  VectorType A(m*m,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3438  for (unsigned int j=0; j<m; j++) {
3439  for (unsigned int i=0; i<m; i++) {
3440  A[i+j*m] = FadType(ndot, urand.number());
3441  for (unsigned int k=0; k<ndot; k++)
3442  A[i+j*m].fastAccessDx(k) = urand.number();
3443  }
3444  }
3445  for (unsigned int j=0; j<n; j++) {
3446  for (unsigned int i=0; i<m; i++) {
3447  B[i+j*m] = FadType(ndot, urand.number());
3448  for (unsigned int k=0; k<ndot; k++)
3449  B[i+j*m].fastAccessDx(k) = urand.number();
3450  }
3451  }
3452  FadType alpha(ndot, urand.number());
3453  FadType beta(ndot, urand.number());
3454  for (unsigned int k=0; k<ndot; k++) {
3455  alpha.fastAccessDx(k) = urand.number();
3456  beta.fastAccessDx(k) = urand.number();
3457  }
3458 
3459  for (unsigned int j=0; j<n; j++) {
3460  for (unsigned int i=0; i<m; i++) {
3461  ScalarType val = urand.number();
3462  C1[i+j*m] = val;
3463  C2[i+j*m] = val;
3464  C3[i+j*m] = val;
3465  }
3466  }
3467 
3468  Teuchos::BLAS<int,FadType> teuchos_blas;
3469  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3470  &A[0], m, &B[0], m, beta, &C1[0], m);
3471 
3472  Teuchos::BLAS<int,FadType> sacado_blas(false);
3473  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3474  &A[0], m, &B[0], m, beta, &C2[0], m);
3475 
3476  COMPARE_FAD_VECTORS(C1, C2, m*n);
3477 
3478  unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3479  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3480  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3481  &A[0], m, &B[0], m, beta, &C3[0], m);
3482 
3483  COMPARE_FAD_VECTORS(C1, C3, m*n);
3484 
3485  // lower tri
3486  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3487  &A[0], m, &B[0], m, beta, &C1[0], m);
3488  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3489  &A[0], m, &B[0], m, beta, &C2[0], m);
3490  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3491  &A[0], m, &B[0], m, beta, &C3[0], m);
3492 
3493  COMPARE_FAD_VECTORS(C1, C2, m*n);
3494  COMPARE_FAD_VECTORS(C1, C3, m*n);
3495 }
3496 
3497 // Tests with constant alpha, beta
3498 template <class FadType, class ScalarType>
3499 void
3502 
3503  // SYMM is apparently not defined in the BLAS for complex types
3505  return;
3506 
3507  VectorType A(m*m,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3508  for (unsigned int j=0; j<m; j++) {
3509  for (unsigned int i=0; i<m; i++) {
3510  A[i+j*m] = FadType(ndot, urand.number());
3511  for (unsigned int k=0; k<ndot; k++)
3512  A[i+j*m].fastAccessDx(k) = urand.number();
3513  }
3514  }
3515  for (unsigned int j=0; j<n; j++) {
3516  for (unsigned int i=0; i<m; i++) {
3517  B[i+j*m] = FadType(ndot, urand.number());
3518  for (unsigned int k=0; k<ndot; k++)
3519  B[i+j*m].fastAccessDx(k) = urand.number();
3520  }
3521  }
3522  ScalarType alpha = urand.number();
3523  ScalarType beta = urand.number();
3524 
3525  for (unsigned int j=0; j<n; j++) {
3526  for (unsigned int i=0; i<m; i++) {
3527  ScalarType val = urand.number();
3528  C1[i+j*m] = FadType(ndot, val);
3529  C2[i+j*m] = FadType(ndot, val);
3530  C3[i+j*m] = FadType(ndot, val);
3531  for (unsigned int k=0; k<ndot; k++) {
3532  val = urand.number();
3533  C1[i+j*m].fastAccessDx(k) = val;
3534  C2[i+j*m].fastAccessDx(k) = val;
3535  C3[i+j*m].fastAccessDx(k) = val;
3536  }
3537  }
3538  }
3539 
3540  Teuchos::BLAS<int,FadType> teuchos_blas;
3541  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3542  &A[0], m, &B[0], m, beta, &C1[0], m);
3543 
3544  Teuchos::BLAS<int,FadType> sacado_blas(false);
3545  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3546  &A[0], m, &B[0], m, beta, &C2[0], m);
3547 
3548  COMPARE_FAD_VECTORS(C1, C2, m*n);
3549 
3550  unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3551  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3552  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3553  &A[0], m, &B[0], m, beta, &C3[0], m);
3554 
3555  COMPARE_FAD_VECTORS(C1, C3, m*n);
3556 
3557  // lower tri
3558  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3559  &A[0], m, &B[0], m, beta, &C1[0], m);
3560  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3561  &A[0], m, &B[0], m, beta, &C2[0], m);
3562  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3563  &A[0], m, &B[0], m, beta, &C3[0], m);
3564 
3565  COMPARE_FAD_VECTORS(C1, C2, m*n);
3566  COMPARE_FAD_VECTORS(C1, C3, m*n);
3567 }
3568 
3569 // Tests with constant A
3570 template <class FadType, class ScalarType>
3571 void
3574 
3575  // SYMM is apparently not defined in the BLAS for complex types
3577  return;
3578 
3579  VectorType A(m*m,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
3580  C4(m*n,ndot), C5(m*n,ndot);
3581  std::vector<ScalarType> a(m*m);
3582  for (unsigned int j=0; j<m; j++) {
3583  for (unsigned int i=0; i<m; i++) {
3584  a[i+j*m] = urand.number();
3585  A[i+j*m] = a[i+j*m];
3586  }
3587  }
3588  for (unsigned int j=0; j<n; j++) {
3589  for (unsigned int i=0; i<m; i++) {
3590  B[i+j*m] = FadType(ndot, urand.number());
3591  for (unsigned int k=0; k<ndot; k++)
3592  B[i+j*m].fastAccessDx(k) = urand.number();
3593  }
3594  }
3595  FadType alpha(ndot, urand.number());
3596  FadType beta(ndot, urand.number());
3597  for (unsigned int k=0; k<ndot; k++) {
3598  alpha.fastAccessDx(k) = urand.number();
3599  beta.fastAccessDx(k) = urand.number();
3600  }
3601 
3602  for (unsigned int j=0; j<n; j++) {
3603  for (unsigned int i=0; i<m; i++) {
3604  ScalarType val = urand.number();
3605  C1[i+j*m] = FadType(ndot, val);
3606  C2[i+j*m] = FadType(ndot, val);
3607  C3[i+j*m] = FadType(ndot, val);
3608  C4[i+j*m] = FadType(ndot, val);
3609  C5[i+j*m] = FadType(ndot, val);
3610  for (unsigned int k=0; k<ndot; k++) {
3611  val = urand.number();
3612  C1[i+j*m].fastAccessDx(k) = val;
3613  C2[i+j*m].fastAccessDx(k) = val;
3614  C3[i+j*m].fastAccessDx(k) = val;
3615  C4[i+j*m].fastAccessDx(k) = val;
3616  C5[i+j*m].fastAccessDx(k) = val;
3617  }
3618  }
3619  }
3620 
3621  Teuchos::BLAS<int,FadType> teuchos_blas;
3622  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3623  &A[0], m, &B[0], m, beta, &C1[0], m);
3624 
3625  Teuchos::BLAS<int,FadType> sacado_blas(false);
3626  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3627  &A[0], m, &B[0], m, beta, &C2[0], m);
3628 
3629  COMPARE_FAD_VECTORS(C1, C2, m*n);
3630 
3631  unsigned int sz = m*m + 2*m*n*(1+ndot);
3632  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3633  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3634  &A[0], m, &B[0], m, beta, &C3[0], m);
3635 
3636  COMPARE_FAD_VECTORS(C1, C3, m*n);
3637 
3638  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3639  &a[0], m, &B[0], m, beta, &C4[0], m);
3640 
3641  COMPARE_FAD_VECTORS(C1, C4, m*n);
3642 
3643  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3644  &a[0], m, &B[0], m, beta, &C5[0], m);
3645 
3646  COMPARE_FAD_VECTORS(C1, C5, m*n);
3647 
3648  // lower tri
3649  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3650  &A[0], m, &B[0], m, beta, &C1[0], m);
3651  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3652  &A[0], m, &B[0], m, beta, &C2[0], m);
3653  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3654  &A[0], m, &B[0], m, beta, &C3[0], m);
3655  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3656  &a[0], m, &B[0], m, beta, &C4[0], m);
3657  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3658  &a[0], m, &B[0], m, beta, &C5[0], m);
3659 
3660  COMPARE_FAD_VECTORS(C1, C2, m*n);
3661  COMPARE_FAD_VECTORS(C1, C3, m*n);
3662  COMPARE_FAD_VECTORS(C1, C4, m*n);
3663  COMPARE_FAD_VECTORS(C1, C5, m*n);
3664 }
3665 
3666 // Tests with constant B
3667 template <class FadType, class ScalarType>
3668 void
3671 
3672  // SYMM is apparently not defined in the BLAS for complex types
3674  return;
3675 
3676  VectorType A(m*m,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
3677  C4(m*n,ndot), C5(m*n,ndot);
3678  std::vector<ScalarType> b(m*n);
3679  for (unsigned int j=0; j<m; j++) {
3680  for (unsigned int i=0; i<m; i++) {
3681  A[i+j*m] = FadType(ndot, urand.number());
3682  for (unsigned int k=0; k<ndot; k++)
3683  A[i+j*m].fastAccessDx(k) = urand.number();
3684  }
3685  }
3686  for (unsigned int j=0; j<n; j++) {
3687  for (unsigned int i=0; i<m; i++) {
3688  b[i+j*m] = urand.number();
3689  B[i+j*m] = b[i+j*m];
3690  }
3691  }
3692  FadType alpha(ndot, urand.number());
3693  FadType beta(ndot, urand.number());
3694  for (unsigned int k=0; k<ndot; k++) {
3695  alpha.fastAccessDx(k) = urand.number();
3696  beta.fastAccessDx(k) = urand.number();
3697  }
3698 
3699  for (unsigned int j=0; j<n; j++) {
3700  for (unsigned int i=0; i<m; i++) {
3701  ScalarType val = urand.number();
3702  C1[i+j*m] = FadType(ndot, val);
3703  C2[i+j*m] = FadType(ndot, val);
3704  C3[i+j*m] = FadType(ndot, val);
3705  C4[i+j*m] = FadType(ndot, val);
3706  C5[i+j*m] = FadType(ndot, val);
3707  for (unsigned int k=0; k<ndot; k++) {
3708  val = urand.number();
3709  C1[i+j*m].fastAccessDx(k) = val;
3710  C2[i+j*m].fastAccessDx(k) = val;
3711  C3[i+j*m].fastAccessDx(k) = val;
3712  C4[i+j*m].fastAccessDx(k) = val;
3713  C5[i+j*m].fastAccessDx(k) = val;
3714  }
3715  }
3716  }
3717 
3718  Teuchos::BLAS<int,FadType> teuchos_blas;
3719  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3720  &A[0], m, &B[0], m, beta, &C1[0], m);
3721 
3722  Teuchos::BLAS<int,FadType> sacado_blas(false);
3723  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3724  &A[0], m, &B[0], m, beta, &C2[0], m);
3725 
3726  COMPARE_FAD_VECTORS(C1, C2, m*n);
3727 
3728  unsigned int sz = m*m*(1+ndot) + m*n*(2+ndot);
3729  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3730  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3731  &A[0], m, &B[0], m, beta, &C3[0], m);
3732 
3733  COMPARE_FAD_VECTORS(C1, C3, m*n);
3734 
3735  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3736  &A[0], m, &b[0], m, beta, &C4[0], m);
3737 
3738  COMPARE_FAD_VECTORS(C1, C4, m*n);
3739 
3740  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3741  &A[0], m, &b[0], m, beta, &C5[0], m);
3742 
3743  COMPARE_FAD_VECTORS(C1, C5, m*n);
3744 
3745  // lower tri
3746  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3747  &A[0], m, &B[0], m, beta, &C1[0], m);
3748  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3749  &A[0], m, &B[0], m, beta, &C2[0], m);
3750  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3751  &A[0], m, &B[0], m, beta, &C3[0], m);
3752  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3753  &A[0], m, &b[0], m, beta, &C4[0], m);
3754  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3755  &A[0], m, &b[0], m, beta, &C5[0], m);
3756 
3757  COMPARE_FAD_VECTORS(C1, C2, m*n);
3758  COMPARE_FAD_VECTORS(C1, C3, m*n);
3759  COMPARE_FAD_VECTORS(C1, C4, m*n);
3760  COMPARE_FAD_VECTORS(C1, C5, m*n);
3761 }
3762 
3763 // Tests with constant A and B
3764 template <class FadType, class ScalarType>
3765 void
3768 
3769  // SYMM is apparently not defined in the BLAS for complex types
3771  return;
3772 
3773  VectorType A(m*m,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
3774  C4(m*n,ndot), C5(m*n,ndot);
3775  std::vector<ScalarType> a(m*m), b(m*n);
3776  for (unsigned int j=0; j<m; j++) {
3777  for (unsigned int i=0; i<m; i++) {
3778  a[i+j*m] = urand.number();
3779  A[i+j*m] = a[i+j*m];
3780  }
3781  }
3782  for (unsigned int j=0; j<n; j++) {
3783  for (unsigned int i=0; i<m; i++) {
3784  b[i+j*m] = urand.number();
3785  B[i+j*m] = b[i+j*m];
3786  }
3787  }
3788  FadType alpha(ndot, urand.number());
3789  FadType beta(ndot, urand.number());
3790  for (unsigned int k=0; k<ndot; k++) {
3791  alpha.fastAccessDx(k) = urand.number();
3792  beta.fastAccessDx(k) = urand.number();
3793  }
3794 
3795  for (unsigned int j=0; j<n; j++) {
3796  for (unsigned int i=0; i<m; i++) {
3797  ScalarType val = urand.number();
3798  C1[i+j*m] = FadType(ndot, val);
3799  C2[i+j*m] = FadType(ndot, val);
3800  C3[i+j*m] = FadType(ndot, val);
3801  C4[i+j*m] = FadType(ndot, val);
3802  C5[i+j*m] = FadType(ndot, val);
3803  for (unsigned int k=0; k<ndot; k++) {
3804  val = urand.number();
3805  C1[i+j*m].fastAccessDx(k) = val;
3806  C2[i+j*m].fastAccessDx(k) = val;
3807  C3[i+j*m].fastAccessDx(k) = val;
3808  C4[i+j*m].fastAccessDx(k) = val;
3809  C5[i+j*m].fastAccessDx(k) = val;
3810  }
3811  }
3812  }
3813 
3814  Teuchos::BLAS<int,FadType> teuchos_blas;
3815  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3816  &A[0], m, &B[0], m, beta, &C1[0], m);
3817 
3818  Teuchos::BLAS<int,FadType> sacado_blas(false);
3819  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3820  &A[0], m, &B[0], m, beta, &C2[0], m);
3821 
3822  COMPARE_FAD_VECTORS(C1, C2, m*n);
3823 
3824  unsigned int sz = m*m + m*n*(2+ndot);
3825  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3826  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3827  &A[0], m, &B[0], m, beta, &C3[0], m);
3828 
3829  COMPARE_FAD_VECTORS(C1, C3, m*n);
3830 
3831  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3832  &a[0], m, &b[0], m, beta, &C4[0], m);
3833 
3834  COMPARE_FAD_VECTORS(C1, C4, m*n);
3835 
3836  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3837  &a[0], m, &b[0], m, beta, &C5[0], m);
3838 
3839  COMPARE_FAD_VECTORS(C1, C5, m*n);
3840 
3841  // lower tri
3842  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3843  &A[0], m, &B[0], m, beta, &C1[0], m);
3844  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3845  &A[0], m, &B[0], m, beta, &C2[0], m);
3846  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3847  &A[0], m, &B[0], m, beta, &C3[0], m);
3848  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3849  &a[0], m, &b[0], m, beta, &C4[0], m);
3850  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3851  &a[0], m, &b[0], m, beta, &C5[0], m);
3852 
3853  COMPARE_FAD_VECTORS(C1, C2, m*n);
3854  COMPARE_FAD_VECTORS(C1, C3, m*n);
3855  COMPARE_FAD_VECTORS(C1, C4, m*n);
3856  COMPARE_FAD_VECTORS(C1, C5, m*n);
3857 }
3858 
3859 // Tests all arguments, left side
3860 template <class FadType, class ScalarType>
3861 void
3864  VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
3865  for (unsigned int j=0; j<m; j++) {
3866  for (unsigned int i=0; i<m; i++) {
3867  A[i+j*m] = FadType(ndot, urand.number());
3868  for (unsigned int k=0; k<ndot; k++)
3869  A[i+j*m].fastAccessDx(k) = urand.number();
3870  }
3871  }
3872  FadType alpha(ndot, urand.number());
3873  for (unsigned int k=0; k<ndot; k++) {
3874  alpha.fastAccessDx(k) = urand.number();
3875  }
3876 
3877  for (unsigned int j=0; j<n; j++) {
3878  for (unsigned int i=0; i<m; i++) {
3879  ScalarType val = urand.number();
3880  B1[i+j*m] = FadType(ndot, val);
3881  B2[i+j*m] = FadType(ndot, val);
3882  B3[i+j*m] = FadType(ndot, val);
3883  for (unsigned int k=0; k<ndot; k++) {
3884  val = urand.number();
3885  B1[i+j*m].fastAccessDx(k) = val;
3886  B2[i+j*m].fastAccessDx(k) = val;
3887  B3[i+j*m].fastAccessDx(k) = val;
3888  }
3889  }
3890  }
3891 
3892  Teuchos::BLAS<int,FadType> teuchos_blas;
3894  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
3895 
3896  Teuchos::BLAS<int,FadType> sacado_blas(false);
3898  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
3899 
3900  COMPARE_FAD_VECTORS(B1, B2, m*n);
3901 
3902  unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
3903  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3905  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
3906 
3907  COMPARE_FAD_VECTORS(B1, B3, m*n);
3908 
3910  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
3912  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
3914  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
3915  COMPARE_FAD_VECTORS(B1, B2, m*n);
3916  COMPARE_FAD_VECTORS(B1, B3, m*n);
3917 
3919  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
3921  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
3923  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
3924  COMPARE_FAD_VECTORS(B1, B2, m*n);
3925  COMPARE_FAD_VECTORS(B1, B3, m*n);
3926 
3927  for (unsigned int i=0; i<m; i++) {
3928  A[i*m+i].val() = 1.0;
3929  for (unsigned int k=0; k<ndot; k++)
3930  A[i*m+i].fastAccessDx(k) = 0.0;
3931  }
3933  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
3935  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
3937  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
3938  COMPARE_FAD_VECTORS(B1, B2, m*n);
3939  COMPARE_FAD_VECTORS(B1, B3, m*n);
3940 }
3941 
3942 // Tests all arguments, right side
3943 template <class FadType, class ScalarType>
3944 void
3947  VectorType A(n*n,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
3948  for (unsigned int j=0; j<n; j++) {
3949  for (unsigned int i=0; i<n; i++) {
3950  A[i+j*n] = FadType(ndot, urand.number());
3951  for (unsigned int k=0; k<ndot; k++)
3952  A[i+j*n].fastAccessDx(k) = urand.number();
3953  }
3954  }
3955  FadType alpha(ndot, urand.number());
3956  for (unsigned int k=0; k<ndot; k++) {
3957  alpha.fastAccessDx(k) = urand.number();
3958  }
3959 
3960  for (unsigned int j=0; j<n; j++) {
3961  for (unsigned int i=0; i<m; i++) {
3962  ScalarType val = urand.number();
3963  B1[i+j*m] = FadType(ndot, val);
3964  B2[i+j*m] = FadType(ndot, val);
3965  B3[i+j*m] = FadType(ndot, val);
3966  for (unsigned int k=0; k<ndot; k++) {
3967  val = urand.number();
3968  B1[i+j*m].fastAccessDx(k) = val;
3969  B2[i+j*m].fastAccessDx(k) = val;
3970  B3[i+j*m].fastAccessDx(k) = val;
3971  }
3972  }
3973  }
3974 
3975  Teuchos::BLAS<int,FadType> teuchos_blas;
3977  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
3978 
3979  Teuchos::BLAS<int,FadType> sacado_blas(false);
3981  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
3982 
3983  COMPARE_FAD_VECTORS(B1, B2, m*n);
3984 
3985  unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
3986  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3988  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
3989 
3990  COMPARE_FAD_VECTORS(B1, B3, m*n);
3991 
3993  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
3995  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
3997  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
3998  COMPARE_FAD_VECTORS(B1, B2, m*n);
3999  COMPARE_FAD_VECTORS(B1, B3, m*n);
4000 
4002  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
4004  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
4006  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
4007  COMPARE_FAD_VECTORS(B1, B2, m*n);
4008  COMPARE_FAD_VECTORS(B1, B3, m*n);
4009 
4010  for (unsigned int i=0; i<n; i++) {
4011  A[i*n+i].val() = 1.0;
4012  for (unsigned int k=0; k<ndot; k++)
4013  A[i*n+i].fastAccessDx(k) = 0.0;
4014  }
4016  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
4018  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
4020  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
4021  COMPARE_FAD_VECTORS(B1, B2, m*n);
4022  COMPARE_FAD_VECTORS(B1, B3, m*n);
4023 }
4024 
4025 // Tests all arguments, left side, different lda, ldb
4026 template <class FadType, class ScalarType>
4027 void
4030  unsigned int lda = m+4;
4031  unsigned int ldb = m+5;
4032  VectorType A(lda*m,ndot), B1(ldb*n,ndot), B2(ldb*n,ndot), B3(ldb*n,ndot);
4033  for (unsigned int j=0; j<m; j++) {
4034  for (unsigned int i=0; i<lda; i++) {
4035  A[i+j*lda] = FadType(ndot, urand.number());
4036  for (unsigned int k=0; k<ndot; k++)
4037  A[i+j*lda].fastAccessDx(k) = urand.number();
4038  }
4039  }
4040  FadType alpha(ndot, urand.number());
4041  for (unsigned int k=0; k<ndot; k++) {
4042  alpha.fastAccessDx(k) = urand.number();
4043  }
4044 
4045  for (unsigned int j=0; j<n; j++) {
4046  for (unsigned int i=0; i<ldb; i++) {
4047  ScalarType val = urand.number();
4048  B1[i+j*ldb] = FadType(ndot, val);
4049  B2[i+j*ldb] = FadType(ndot, val);
4050  B3[i+j*ldb] = FadType(ndot, val);
4051  for (unsigned int k=0; k<ndot; k++) {
4052  val = urand.number();
4053  B1[i+j*ldb].fastAccessDx(k) = val;
4054  B2[i+j*ldb].fastAccessDx(k) = val;
4055  B3[i+j*ldb].fastAccessDx(k) = val;
4056  }
4057  }
4058  }
4059 
4060  Teuchos::BLAS<int,FadType> teuchos_blas;
4062  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4063 
4064  Teuchos::BLAS<int,FadType> sacado_blas(false);
4066  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4067 
4068  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4069 
4070  unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4071  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4073  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4074 
4075  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4076 
4078  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4080  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4082  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4083  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4084  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4085 
4087  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4089  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4091  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4092  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4093  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4094 
4095  for (unsigned int i=0; i<m; i++) {
4096  A[i*lda+i].val() = 1.0;
4097  for (unsigned int k=0; k<ndot; k++)
4098  A[i*lda+i].fastAccessDx(k) = 0.0;
4099  }
4101  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4103  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4105  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4106  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4107  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4108 }
4109 
4110 // Tests all arguments, right side, different lda, ldb
4111 template <class FadType, class ScalarType>
4112 void
4115  unsigned int lda = n+4;
4116  unsigned int ldb = m+5;
4117  VectorType A(lda*n,ndot), B1(ldb*n,ndot), B2(ldb*n,ndot), B3(ldb*n,ndot);
4118  for (unsigned int j=0; j<n; j++) {
4119  for (unsigned int i=0; i<lda; i++) {
4120  A[i+j*lda] = FadType(ndot, urand.number());
4121  for (unsigned int k=0; k<ndot; k++)
4122  A[i+j*lda].fastAccessDx(k) = urand.number();
4123  }
4124  }
4125  FadType alpha(ndot, urand.number());
4126  for (unsigned int k=0; k<ndot; k++) {
4127  alpha.fastAccessDx(k) = urand.number();
4128  }
4129 
4130  for (unsigned int j=0; j<n; j++) {
4131  for (unsigned int i=0; i<ldb; i++) {
4132  ScalarType val = urand.number();
4133  B1[i+j*ldb] = FadType(ndot, val);
4134  B2[i+j*ldb] = FadType(ndot, val);
4135  B3[i+j*ldb] = FadType(ndot, val);
4136  for (unsigned int k=0; k<ndot; k++) {
4137  val = urand.number();
4138  B1[i+j*ldb].fastAccessDx(k) = val;
4139  B2[i+j*ldb].fastAccessDx(k) = val;
4140  B3[i+j*ldb].fastAccessDx(k) = val;
4141  }
4142  }
4143  }
4144 
4145  Teuchos::BLAS<int,FadType> teuchos_blas;
4147  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4148 
4149  Teuchos::BLAS<int,FadType> sacado_blas(false);
4151  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4152 
4153  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4154 
4155  unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
4156  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4158  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4159 
4160  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4161 
4163  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4165  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4167  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4168  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4169  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4170 
4172  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4174  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4176  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4177  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4178  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4179 
4180  for (unsigned int i=0; i<n; i++) {
4181  A[i*lda+i].val() = 1.0;
4182  for (unsigned int k=0; k<ndot; k++)
4183  A[i*lda+i].fastAccessDx(k) = 0.0;
4184  }
4186  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4188  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4190  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4191  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4192  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4193 }
4194 
4195 // Tests constant alpha
4196 template <class FadType, class ScalarType>
4197 void
4200  VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4201  for (unsigned int j=0; j<m; j++) {
4202  for (unsigned int i=0; i<m; i++) {
4203  A[i+j*m] = FadType(ndot, urand.number());
4204  for (unsigned int k=0; k<ndot; k++)
4205  A[i+j*m].fastAccessDx(k) = urand.number();
4206  }
4207  }
4208  ScalarType alpha = urand.number();
4209 
4210  for (unsigned int j=0; j<n; j++) {
4211  for (unsigned int i=0; i<m; i++) {
4212  ScalarType val = urand.number();
4213  B1[i+j*m] = FadType(ndot, val);
4214  B2[i+j*m] = FadType(ndot, val);
4215  B3[i+j*m] = FadType(ndot, val);
4216  for (unsigned int k=0; k<ndot; k++) {
4217  val = urand.number();
4218  B1[i+j*m].fastAccessDx(k) = val;
4219  B2[i+j*m].fastAccessDx(k) = val;
4220  B3[i+j*m].fastAccessDx(k) = val;
4221  }
4222  }
4223  }
4224 
4225  Teuchos::BLAS<int,FadType> teuchos_blas;
4227  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4228 
4229  Teuchos::BLAS<int,FadType> sacado_blas(false);
4231  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4232 
4233  COMPARE_FAD_VECTORS(B1, B2, m*n);
4234 
4235  unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4236  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4238  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4239 
4240  COMPARE_FAD_VECTORS(B1, B3, m*n);
4241 
4243  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4245  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4247  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4248  COMPARE_FAD_VECTORS(B1, B2, m*n);
4249  COMPARE_FAD_VECTORS(B1, B3, m*n);
4250 
4252  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4254  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4256  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4257  COMPARE_FAD_VECTORS(B1, B2, m*n);
4258  COMPARE_FAD_VECTORS(B1, B3, m*n);
4259 
4260  for (unsigned int i=0; i<m; i++) {
4261  A[i*m+i].val() = 1.0;
4262  for (unsigned int k=0; k<ndot; k++)
4263  A[i*m+i].fastAccessDx(k) = 0.0;
4264  }
4266  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4268  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4270  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4271  COMPARE_FAD_VECTORS(B1, B2, m*n);
4272  COMPARE_FAD_VECTORS(B1, B3, m*n);
4273 }
4274 
4275 // Tests constant B
4276 template <class FadType, class ScalarType>
4277 void
4280  VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4281  for (unsigned int j=0; j<m; j++) {
4282  for (unsigned int i=0; i<m; i++) {
4283  A[i+j*m] = FadType(ndot, urand.number());
4284  for (unsigned int k=0; k<ndot; k++)
4285  A[i+j*m].fastAccessDx(k) = urand.number();
4286  }
4287  }
4288  FadType alpha(ndot, urand.number());
4289  for (unsigned int k=0; k<ndot; k++) {
4290  alpha.fastAccessDx(k) = urand.number();
4291  }
4292 
4293  for (unsigned int j=0; j<n; j++) {
4294  for (unsigned int i=0; i<m; i++) {
4295  ScalarType val = urand.number();
4296  B1[i+j*m] = val;
4297  B2[i+j*m] = val;
4298  B3[i+j*m] = val;
4299  }
4300  }
4301 
4302  Teuchos::BLAS<int,FadType> teuchos_blas;
4304  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4305 
4306  Teuchos::BLAS<int,FadType> sacado_blas(false);
4308  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4309 
4310  COMPARE_FAD_VECTORS(B1, B2, m*n);
4311 
4312  unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4313  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4315  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4316 
4317  COMPARE_FAD_VECTORS(B1, B3, m*n);
4318 
4320  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4322  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4324  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4325  COMPARE_FAD_VECTORS(B1, B2, m*n);
4326  COMPARE_FAD_VECTORS(B1, B3, m*n);
4327 
4329  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4331  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4333  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4334  COMPARE_FAD_VECTORS(B1, B2, m*n);
4335  COMPARE_FAD_VECTORS(B1, B3, m*n);
4336 
4337  for (unsigned int i=0; i<m; i++) {
4338  A[i*m+i].val() = 1.0;
4339  for (unsigned int k=0; k<ndot; k++)
4340  A[i*m+i].fastAccessDx(k) = 0.0;
4341  }
4343  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4345  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4347  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4348  COMPARE_FAD_VECTORS(B1, B2, m*n);
4349  COMPARE_FAD_VECTORS(B1, B3, m*n);
4350 }
4351 
4352 // Tests constant A
4353 template <class FadType, class ScalarType>
4354 void
4357  VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot),
4358  B4(m*n,ndot), B5(m*n,ndot);
4359  std::vector<ScalarType> a(m*m);
4360  for (unsigned int j=0; j<m; j++) {
4361  for (unsigned int i=0; i<m; i++) {
4362  a[i+j*m] = urand.number();
4363  A[i+j*m] = a[i+j*m];
4364  }
4365  }
4366  FadType alpha(ndot, urand.number());
4367  for (unsigned int k=0; k<ndot; k++) {
4368  alpha.fastAccessDx(k) = urand.number();
4369  }
4370 
4371  for (unsigned int j=0; j<n; j++) {
4372  for (unsigned int i=0; i<m; i++) {
4373  ScalarType val = urand.number();
4374  B1[i+j*m] = FadType(ndot, val);
4375  B2[i+j*m] = FadType(ndot, val);
4376  B3[i+j*m] = FadType(ndot, val);
4377  B4[i+j*m] = FadType(ndot, val);
4378  B5[i+j*m] = FadType(ndot, val);
4379  for (unsigned int k=0; k<ndot; k++) {
4380  val = urand.number();
4381  B1[i+j*m].fastAccessDx(k) = val;
4382  B2[i+j*m].fastAccessDx(k) = val;
4383  B3[i+j*m].fastAccessDx(k) = val;
4384  B4[i+j*m].fastAccessDx(k) = val;
4385  B5[i+j*m].fastAccessDx(k) = val;
4386  }
4387  }
4388  }
4389 
4390  Teuchos::BLAS<int,FadType> teuchos_blas;
4392  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4393 
4394  Teuchos::BLAS<int,FadType> sacado_blas(false);
4396  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4397 
4398  COMPARE_FAD_VECTORS(B1, B2, m*n);
4399 
4400  unsigned int sz = m*m + m*n*(1+ndot);
4401  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4403  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4404 
4405  COMPARE_FAD_VECTORS(B1, B3, m*n);
4406 
4408  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
4409 
4410  COMPARE_FAD_VECTORS(B1, B4, m*n);
4411 
4413  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
4414 
4415  COMPARE_FAD_VECTORS(B1, B5, m*n);
4416 
4418  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4420  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4422  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4424  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
4426  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
4427  COMPARE_FAD_VECTORS(B1, B2, m*n);
4428  COMPARE_FAD_VECTORS(B1, B3, m*n);
4429  COMPARE_FAD_VECTORS(B1, B4, m*n);
4430  COMPARE_FAD_VECTORS(B1, B5, m*n);
4431 
4433  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4435  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4437  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4439  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
4441  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
4442  COMPARE_FAD_VECTORS(B1, B2, m*n);
4443  COMPARE_FAD_VECTORS(B1, B3, m*n);
4444  COMPARE_FAD_VECTORS(B1, B4, m*n);
4445  COMPARE_FAD_VECTORS(B1, B5, m*n);
4446 
4447  for (unsigned int i=0; i<m; i++) {
4448  A[i*m+i].val() = 1.0;
4449  for (unsigned int k=0; k<ndot; k++)
4450  A[i*m+i].fastAccessDx(k) = 0.0;
4451  }
4453  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4455  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4457  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4459  Teuchos::UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
4461  Teuchos::UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
4462  COMPARE_FAD_VECTORS(B1, B2, m*n);
4463  COMPARE_FAD_VECTORS(B1, B3, m*n);
4464  COMPARE_FAD_VECTORS(B1, B4, m*n);
4465  COMPARE_FAD_VECTORS(B1, B5, m*n);
4466 }
4467 
4468 // Tests all arguments, left side
4469 template <class FadType, class ScalarType>
4470 void
4473  VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4474  for (unsigned int j=0; j<m; j++) {
4475  for (unsigned int i=0; i<m; i++) {
4476  //A[i+j*m] = urand.number();
4477  A[i+j*m] = FadType(ndot, urand.number());
4478  for (unsigned int k=0; k<ndot; k++)
4479  A[i+j*m].fastAccessDx(k) = urand.number();
4480  }
4481  }
4482  FadType alpha(ndot, urand.number());
4483  for (unsigned int k=0; k<ndot; k++) {
4484  alpha.fastAccessDx(k) = urand.number();
4485  }
4486  //ScalarType alpha = urand.number();
4487 
4488  for (unsigned int j=0; j<n; j++) {
4489  for (unsigned int i=0; i<m; i++) {
4490  ScalarType val = urand.number();
4491  // B1[i+j*m] = val;
4492  // B2[i+j*m] = val;
4493  // B3[i+j*m] = val;
4494  B1[i+j*m] = FadType(ndot, val);
4495  B2[i+j*m] = FadType(ndot, val);
4496  B3[i+j*m] = FadType(ndot, val);
4497  for (unsigned int k=0; k<ndot; k++) {
4498  val = urand.number();
4499  B1[i+j*m].fastAccessDx(k) = val;
4500  B2[i+j*m].fastAccessDx(k) = val;
4501  B3[i+j*m].fastAccessDx(k) = val;
4502  }
4503  }
4504  }
4505 
4506  Teuchos::BLAS<int,FadType> teuchos_blas;
4508  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4509 
4510  Teuchos::BLAS<int,FadType> sacado_blas(false);
4512  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4513 
4514  COMPARE_FAD_VECTORS(B1, B2, m*n);
4515 
4516  unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4517  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4519  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4520 
4521  COMPARE_FAD_VECTORS(B1, B3, m*n);
4522 
4524  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4526  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4528  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4529  COMPARE_FAD_VECTORS(B1, B2, m*n);
4530  COMPARE_FAD_VECTORS(B1, B3, m*n);
4531 
4533  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4535  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4537  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4538  COMPARE_FAD_VECTORS(B1, B2, m*n);
4539  COMPARE_FAD_VECTORS(B1, B3, m*n);
4540 
4541  for (unsigned int i=0; i<m; i++) {
4542  A[i*m+i].val() = 1.0;
4543  for (unsigned int k=0; k<ndot; k++)
4544  A[i*m+i].fastAccessDx(k) = 0.0;
4545  }
4547  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4549  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4551  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4552  COMPARE_FAD_VECTORS(B1, B2, m*n);
4553  COMPARE_FAD_VECTORS(B1, B3, m*n);
4554 }
4555 
4556 // Tests all arguments, right side
4557 template <class FadType, class ScalarType>
4558 void
4561  VectorType A(n*n,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4562  for (unsigned int j=0; j<n; j++) {
4563  for (unsigned int i=0; i<n; i++) {
4564  A[i+j*n] = FadType(ndot, urand.number());
4565  for (unsigned int k=0; k<ndot; k++)
4566  A[i+j*n].fastAccessDx(k) = urand.number();
4567  }
4568  }
4569  FadType alpha(ndot, urand.number());
4570  for (unsigned int k=0; k<ndot; k++) {
4571  alpha.fastAccessDx(k) = urand.number();
4572  }
4573 
4574  for (unsigned int j=0; j<n; j++) {
4575  for (unsigned int i=0; i<m; i++) {
4576  ScalarType val = urand.number();
4577  B1[i+j*m] = FadType(ndot, val);
4578  B2[i+j*m] = FadType(ndot, val);
4579  B3[i+j*m] = FadType(ndot, val);
4580  for (unsigned int k=0; k<ndot; k++) {
4581  val = urand.number();
4582  B1[i+j*m].fastAccessDx(k) = val;
4583  B2[i+j*m].fastAccessDx(k) = val;
4584  B3[i+j*m].fastAccessDx(k) = val;
4585  }
4586  }
4587  }
4588 
4589  Teuchos::BLAS<int,FadType> teuchos_blas;
4591  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
4592 
4593  Teuchos::BLAS<int,FadType> sacado_blas(false);
4595  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
4596 
4597  COMPARE_FAD_VECTORS(B1, B2, m*n);
4598 
4599  unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
4600  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4602  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
4603 
4604  COMPARE_FAD_VECTORS(B1, B3, m*n);
4605 
4607  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
4609  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
4611  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
4612  COMPARE_FAD_VECTORS(B1, B2, m*n);
4613  COMPARE_FAD_VECTORS(B1, B3, m*n);
4614 
4616  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
4618  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
4620  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
4621  COMPARE_FAD_VECTORS(B1, B2, m*n);
4622  COMPARE_FAD_VECTORS(B1, B3, m*n);
4623 
4624  for (unsigned int i=0; i<n; i++) {
4625  A[i*n+i].val() = 1.0;
4626  for (unsigned int k=0; k<ndot; k++)
4627  A[i*n+i].fastAccessDx(k) = 0.0;
4628  }
4630  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
4632  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
4634  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
4635  COMPARE_FAD_VECTORS(B1, B2, m*n);
4636  COMPARE_FAD_VECTORS(B1, B3, m*n);
4637 }
4638 
4639 // Tests all arguments, left side, different lda, ldb
4640 template <class FadType, class ScalarType>
4641 void
4644  unsigned int lda = m+4;
4645  unsigned int ldb = m+5;
4646  VectorType A(lda*m,ndot), B1(ldb*n,ndot), B2(ldb*n,ndot), B3(ldb*n,ndot);
4647  for (unsigned int j=0; j<m; j++) {
4648  for (unsigned int i=0; i<lda; i++) {
4649  A[i+j*lda] = FadType(ndot, urand.number());
4650  for (unsigned int k=0; k<ndot; k++)
4651  A[i+j*lda].fastAccessDx(k) = urand.number();
4652  }
4653  }
4654  FadType alpha(ndot, urand.number());
4655  for (unsigned int k=0; k<ndot; k++) {
4656  alpha.fastAccessDx(k) = urand.number();
4657  }
4658 
4659  for (unsigned int j=0; j<n; j++) {
4660  for (unsigned int i=0; i<ldb; i++) {
4661  ScalarType val = urand.number();
4662  B1[i+j*ldb] = FadType(ndot, val);
4663  B2[i+j*ldb] = FadType(ndot, val);
4664  B3[i+j*ldb] = FadType(ndot, val);
4665  for (unsigned int k=0; k<ndot; k++) {
4666  val = urand.number();
4667  B1[i+j*ldb].fastAccessDx(k) = val;
4668  B2[i+j*ldb].fastAccessDx(k) = val;
4669  B3[i+j*ldb].fastAccessDx(k) = val;
4670  }
4671  }
4672  }
4673 
4674  Teuchos::BLAS<int,FadType> teuchos_blas;
4676  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4677 
4678  Teuchos::BLAS<int,FadType> sacado_blas(false);
4680  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4681 
4682  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4683 
4684  unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4685  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4687  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4688 
4689  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4690 
4692  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4694  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4696  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4697  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4698  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4699 
4701  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4703  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4705  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4706  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4707  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4708 
4709  for (unsigned int i=0; i<m; i++) {
4710  A[i*lda+i].val() = 1.0;
4711  for (unsigned int k=0; k<ndot; k++)
4712  A[i*lda+i].fastAccessDx(k) = 0.0;
4713  }
4715  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4717  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4719  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4720  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4721  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4722 }
4723 
4724 // Tests all arguments, right side, different lda, ldb
4725 template <class FadType, class ScalarType>
4726 void
4729  unsigned int lda = n+4;
4730  unsigned int ldb = m+5;
4731  VectorType A(lda*n,ndot), B1(ldb*n,ndot), B2(ldb*n,ndot), B3(ldb*n,ndot);
4732  for (unsigned int j=0; j<n; j++) {
4733  for (unsigned int i=0; i<lda; i++) {
4734  A[i+j*lda] = FadType(ndot, urand.number());
4735  for (unsigned int k=0; k<ndot; k++)
4736  A[i+j*lda].fastAccessDx(k) = urand.number();
4737  }
4738  }
4739  FadType alpha(ndot, urand.number());
4740  for (unsigned int k=0; k<ndot; k++) {
4741  alpha.fastAccessDx(k) = urand.number();
4742  }
4743 
4744  for (unsigned int j=0; j<n; j++) {
4745  for (unsigned int i=0; i<ldb; i++) {
4746  ScalarType val = urand.number();
4747  B1[i+j*ldb] = FadType(ndot, val);
4748  B2[i+j*ldb] = FadType(ndot, val);
4749  B3[i+j*ldb] = FadType(ndot, val);
4750  for (unsigned int k=0; k<ndot; k++) {
4751  val = urand.number();
4752  B1[i+j*ldb].fastAccessDx(k) = val;
4753  B2[i+j*ldb].fastAccessDx(k) = val;
4754  B3[i+j*ldb].fastAccessDx(k) = val;
4755  }
4756  }
4757  }
4758 
4759  Teuchos::BLAS<int,FadType> teuchos_blas;
4761  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4762 
4763  Teuchos::BLAS<int,FadType> sacado_blas(false);
4765  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4766 
4767  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4768 
4769  unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
4770  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4772  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4773 
4774  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4775 
4777  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4779  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4781  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4782  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4783  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4784 
4786  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4788  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4790  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4791  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4792  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4793 
4794  for (unsigned int i=0; i<n; i++) {
4795  A[i*lda+i].val() = 1.0;
4796  for (unsigned int k=0; k<ndot; k++)
4797  A[i*lda+i].fastAccessDx(k) = 0.0;
4798  }
4800  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4802  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4804  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4805  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4806  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4807 }
4808 
4809 // Tests constant alpha
4810 template <class FadType, class ScalarType>
4811 void
4814  VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4815  for (unsigned int j=0; j<m; j++) {
4816  for (unsigned int i=0; i<m; i++) {
4817  A[i+j*m] = FadType(ndot, urand.number());
4818  for (unsigned int k=0; k<ndot; k++)
4819  A[i+j*m].fastAccessDx(k) = urand.number();
4820  }
4821  }
4822  ScalarType alpha = urand.number();
4823 
4824  for (unsigned int j=0; j<n; j++) {
4825  for (unsigned int i=0; i<m; i++) {
4826  ScalarType val = urand.number();
4827  B1[i+j*m] = FadType(ndot, val);
4828  B2[i+j*m] = FadType(ndot, val);
4829  B3[i+j*m] = FadType(ndot, val);
4830  for (unsigned int k=0; k<ndot; k++) {
4831  val = urand.number();
4832  B1[i+j*m].fastAccessDx(k) = val;
4833  B2[i+j*m].fastAccessDx(k) = val;
4834  B3[i+j*m].fastAccessDx(k) = val;
4835  }
4836  }
4837  }
4838 
4839  Teuchos::BLAS<int,FadType> teuchos_blas;
4841  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4842 
4843  Teuchos::BLAS<int,FadType> sacado_blas(false);
4845  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4846 
4847  COMPARE_FAD_VECTORS(B1, B2, m*n);
4848 
4849  unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4850  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4852  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4853 
4854  COMPARE_FAD_VECTORS(B1, B3, m*n);
4855 
4857  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4859  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4861  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4862  COMPARE_FAD_VECTORS(B1, B2, m*n);
4863  COMPARE_FAD_VECTORS(B1, B3, m*n);
4864 
4866  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4868  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4870  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4871  COMPARE_FAD_VECTORS(B1, B2, m*n);
4872  COMPARE_FAD_VECTORS(B1, B3, m*n);
4873 
4874  for (unsigned int i=0; i<m; i++) {
4875  A[i*m+i].val() = 1.0;
4876  for (unsigned int k=0; k<ndot; k++)
4877  A[i*m+i].fastAccessDx(k) = 0.0;
4878  }
4880  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4882  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4884  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4885  COMPARE_FAD_VECTORS(B1, B2, m*n);
4886  COMPARE_FAD_VECTORS(B1, B3, m*n);
4887 }
4888 
4889 // Tests constant B
4890 template <class FadType, class ScalarType>
4891 void
4894  VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4895  for (unsigned int j=0; j<m; j++) {
4896  for (unsigned int i=0; i<m; i++) {
4897  A[i+j*m] = FadType(ndot, urand.number());
4898  for (unsigned int k=0; k<ndot; k++)
4899  A[i+j*m].fastAccessDx(k) = urand.number();
4900  }
4901  }
4902  FadType alpha(ndot, urand.number());
4903  for (unsigned int k=0; k<ndot; k++) {
4904  alpha.fastAccessDx(k) = urand.number();
4905  }
4906 
4907  for (unsigned int j=0; j<n; j++) {
4908  for (unsigned int i=0; i<m; i++) {
4909  ScalarType val = urand.number();
4910  B1[i+j*m] = val;
4911  B2[i+j*m] = val;
4912  B3[i+j*m] = val;
4913  }
4914  }
4915 
4916  Teuchos::BLAS<int,FadType> teuchos_blas;
4918  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4919 
4920  Teuchos::BLAS<int,FadType> sacado_blas(false);
4922  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4923 
4924  COMPARE_FAD_VECTORS(B1, B2, m*n);
4925 
4926  unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4927  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4929  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4930 
4931  COMPARE_FAD_VECTORS(B1, B3, m*n);
4932 
4934  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4936  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4938  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4939  COMPARE_FAD_VECTORS(B1, B2, m*n);
4940  COMPARE_FAD_VECTORS(B1, B3, m*n);
4941 
4943  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4945  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4947  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4948  COMPARE_FAD_VECTORS(B1, B2, m*n);
4949  COMPARE_FAD_VECTORS(B1, B3, m*n);
4950 
4951  for (unsigned int i=0; i<m; i++) {
4952  A[i*m+i].val() = 1.0;
4953  for (unsigned int k=0; k<ndot; k++)
4954  A[i*m+i].fastAccessDx(k) = 0.0;
4955  }
4957  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4959  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4961  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4962  COMPARE_FAD_VECTORS(B1, B2, m*n);
4963  COMPARE_FAD_VECTORS(B1, B3, m*n);
4964 }
4965 
4966 // Tests constant A
4967 template <class FadType, class ScalarType>
4968 void
4971  VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot),
4972  B4(m*n,ndot), B5(m*n,ndot);
4973  std::vector<ScalarType> a(m*m);
4974  for (unsigned int j=0; j<m; j++) {
4975  for (unsigned int i=0; i<m; i++) {
4976  a[i+j*m] = urand.number();
4977  A[i+j*m] = a[i+j*m];
4978  }
4979  }
4980  FadType alpha(ndot, urand.number());
4981  for (unsigned int k=0; k<ndot; k++) {
4982  alpha.fastAccessDx(k) = urand.number();
4983  }
4984 
4985  for (unsigned int j=0; j<n; j++) {
4986  for (unsigned int i=0; i<m; i++) {
4987  ScalarType val = urand.number();
4988  B1[i+j*m] = FadType(ndot, val);
4989  B2[i+j*m] = FadType(ndot, val);
4990  B3[i+j*m] = FadType(ndot, val);
4991  B4[i+j*m] = FadType(ndot, val);
4992  B5[i+j*m] = FadType(ndot, val);
4993  for (unsigned int k=0; k<ndot; k++) {
4994  val = urand.number();
4995  B1[i+j*m].fastAccessDx(k) = val;
4996  B2[i+j*m].fastAccessDx(k) = val;
4997  B3[i+j*m].fastAccessDx(k) = val;
4998  B4[i+j*m].fastAccessDx(k) = val;
4999  B5[i+j*m].fastAccessDx(k) = val;
5000  }
5001  }
5002  }
5003 
5004  Teuchos::BLAS<int,FadType> teuchos_blas;
5006  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
5007 
5008  Teuchos::BLAS<int,FadType> sacado_blas(false);
5010  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
5011 
5012  COMPARE_FAD_VECTORS(B1, B2, m*n);
5013 
5014  unsigned int sz = m*m + m*n*(1+ndot);
5015  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
5017  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
5018 
5019  COMPARE_FAD_VECTORS(B1, B3, m*n);
5020 
5022  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
5023 
5024  COMPARE_FAD_VECTORS(B1, B4, m*n);
5025 
5027  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
5028 
5029  COMPARE_FAD_VECTORS(B1, B5, m*n);
5030 
5032  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
5034  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
5036  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
5038  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
5040  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
5041  COMPARE_FAD_VECTORS(B1, B2, m*n);
5042  COMPARE_FAD_VECTORS(B1, B3, m*n);
5043  COMPARE_FAD_VECTORS(B1, B4, m*n);
5044  COMPARE_FAD_VECTORS(B1, B5, m*n);
5045 
5047  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
5049  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
5051  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
5053  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
5055  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
5056  COMPARE_FAD_VECTORS(B1, B2, m*n);
5057  COMPARE_FAD_VECTORS(B1, B3, m*n);
5058  COMPARE_FAD_VECTORS(B1, B4, m*n);
5059  COMPARE_FAD_VECTORS(B1, B5, m*n);
5060 
5061  for (unsigned int i=0; i<m; i++) {
5062  A[i*m+i].val() = 1.0;
5063  for (unsigned int k=0; k<ndot; k++)
5064  A[i*m+i].fastAccessDx(k) = 0.0;
5065  }
5067  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
5069  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
5071  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
5073  Teuchos::UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
5075  Teuchos::UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
5076  COMPARE_FAD_VECTORS(B1, B2, m*n);
5077  COMPARE_FAD_VECTORS(B1, B3, m*n);
5078  COMPARE_FAD_VECTORS(B1, B4, m*n);
5079  COMPARE_FAD_VECTORS(B1, B5, m*n);
5080 }
5081 
5082 #undef COMPARE_VALUES
5083 #undef COMPARE_FADS
5084 #undef COMPARE_FAD_VECTORS
5085 
5086 #endif // FADBLASUNITTESTS_HPP
ScalarType DOT(const OrdinalType n, const x_type *x, const OrdinalType incx, const y_type *y, const OrdinalType incy) const
expr val()
void GER(const OrdinalType m, const OrdinalType n, const alpha_type alpha, const x_type *x, const OrdinalType incx, const y_type *y, const OrdinalType incy, ScalarType *A, const OrdinalType lda) const
void SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type *A, const OrdinalType lda, const B_type *B, const OrdinalType ldb, const beta_type beta, ScalarType *C, const OrdinalType ldc) const
void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type *A, const OrdinalType lda, ScalarType *B, const OrdinalType ldb) const
CPPUNIT_TEST(testSCAL1)
#define COMPARE_FAD_VECTORS(X1, X2, n)
Sacado::Random< ScalarType > urand
Sacado::Fad::DFad< double > FadType
void GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type *A, const OrdinalType lda, const x_type *x, const OrdinalType incx, const beta_type beta, ScalarType *y, const OrdinalType incy) const
void GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const alpha_type alpha, const A_type *A, const OrdinalType lda, const B_type *B, const OrdinalType ldb, const beta_type beta, ScalarType *C, const OrdinalType ldc) const
CPPUNIT_TEST_SUITE(FadBLASUnitTests)
ScalarTraits< ScalarType >::magnitudeType NRM2(const OrdinalType n, const ScalarType *x, const OrdinalType incx) const
void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const A_type *A, const OrdinalType lda, ScalarType *x, const OrdinalType incx) const
A class for storing a contiguously allocated array of Fad objects. This is a general definition that ...
#define COMPARE_FADS(a, b)
void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type *A, const OrdinalType lda, ScalarType *B, const OrdinalType ldb) const
Sacado::Random< double > real_urand
expr expr expr fastAccessDx(i)) FAD_UNARYOP_MACRO(exp
Sacado::Fad::Vector< unsigned int, FadType > VectorType
#define A
Definition: Sacado_rad.hpp:572
void COPY(const OrdinalType n, const ScalarType *x, const OrdinalType incx, ScalarType *y, const OrdinalType incy) const
void AXPY(const OrdinalType n, const alpha_type alpha, const x_type *x, const OrdinalType incx, ScalarType *y, const OrdinalType incy) const
void SCAL(const OrdinalType n, const ScalarType alpha, ScalarType *x, const OrdinalType incx) const
int n