ShyLU  Version of the Day
shylu_schur.cpp
1 
2 //@HEADER
3 // ************************************************************************
4 //
5 // ShyLU: Hybrid preconditioner package
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ************************************************************************
41 //@HEADER
42 
43 
44 #include "shylu.h"
45 #include "shylu_util.h"
46 //#include "EpetraExt_Transpose_RowMatrix.h"
47 #include "Epetra_Vector.h"
48 #include "shylu_probing_operator.h"
49 #include "shylu_local_schur_operator.h"
50 #include <EpetraExt_Reindex_CrsMatrix.h>
51 
52 /* Apply an identity matrix to the Schur complement operator. Drop the entries
53  entries using a relative threshold. Assemble the result in a Crs Matrix
54  which will be our approximate Schur complement.
55  */
56 Teuchos::RCP<Epetra_CrsMatrix> computeApproxSchur(shylu_config *config,
57  shylu_symbolic *sym,
58  Epetra_CrsMatrix *G, Epetra_CrsMatrix *R,
59  Epetra_LinearProblem *LP, Amesos_BaseSolver *solver,
60  Ifpack_Preconditioner *ifSolver, Epetra_CrsMatrix *C,
61  Epetra_Map *localDRowMap)
62 {
63  double relative_thres = config->relative_threshold;
64  int nvectors = 16;
65 
66  ShyLU_Probing_Operator probeop(config, sym, G, R, LP, solver, ifSolver, C,
67  localDRowMap, nvectors);
68 
69  // Get row map
70  Epetra_Map rMap = G->RowMap();
71  int *rows = rMap.MyGlobalElements();
72  int totalElems = rMap.NumGlobalElements();
73  int localElems = rMap.NumMyElements();
74  //cout << " totalElems in Schur Complement" << totalElems << endl;
75  //cout << myPID << " localElems" << localElems << endl;
76 
77  // **************** Two collectives here *********************
78 #ifdef TIMING_OUTPUT
79  Teuchos::Time ftime("setup time");
80  ftime.start();
81 #endif
82  int prefixSum;
83  G->Comm().ScanSum(&localElems, &prefixSum, 1);
84  //cout << " prefixSum" << prefixSum << endl;
85  // Start the index in prefixSum-localElems
86  int *mySGID = new int[totalElems]; // vector of size Schur complement !
87  int *allSGID = new int[totalElems]; // vector of size Schur complement !
88  int i, j;
89  for (i = 0, j = 0; i < totalElems ; i++)
90  {
91  if (i >= prefixSum - localElems && i < prefixSum)
92  {
93  mySGID[i] = rows[j];
94  j++;
95  }
96  else
97  {
98  mySGID[i] = 0;
99  }
100  allSGID[i] = 0;
101  }
102 
103  C->Comm().SumAll(mySGID, allSGID, totalElems);
104 
105 #ifdef TIMING_OUTPUT
106  ftime.stop();
107  cout << "Time to Compute RowIDS" << ftime.totalElapsedTime() << endl;
108  ftime.reset();
109 #endif
110  // Now everyone knows the GIDs in the Schur complement
111 
112  //cout << rMap << endl;
113  j = 0;
114  Teuchos::RCP<Epetra_CrsMatrix> Sbar = Teuchos::rcp(new Epetra_CrsMatrix(
115  Copy, rMap, localElems));
116  int nentries;
117  double *values = new double[localElems]; // Need to adjust this for more
118  int *indices = new int[localElems]; // than one vector
119  double *vecvalues;
120  int dropped = 0;
121  double *maxvalue = new double[nvectors];
122 #ifdef TIMING_OUTPUT
123  ftime.start();
124 #endif
125 #ifdef TIMING_OUTPUT
126  Teuchos::Time app_time("Apply time");
127 #endif
128  int findex = totalElems / nvectors ;
129  for (i = 0 ; i < findex*nvectors ; i+=nvectors)
130  {
131  Epetra_MultiVector probevec(rMap, nvectors);
132  Epetra_MultiVector Scol(rMap, nvectors);
133 
134  probevec.PutScalar(0.0);
135  int cindex;
136  for (int k = 0; k < nvectors; k++)
137  {
138  cindex = k+i;
139  if (cindex >= prefixSum - localElems && cindex < prefixSum)
140  {
141  probevec.ReplaceGlobalValue(allSGID[cindex], k, 1.0);
142  }
143  }
144 
145 #ifdef TIMING_OUTPUT
146  app_time.start();
147 #endif
148  probeop.Apply(probevec, Scol);
149 #ifdef TIMING_OUTPUT
150  app_time.stop();
151 #endif
152  Scol.MaxValue(maxvalue);
153  for (int k = 0; k < nvectors; k++) //TODO:Need to switch these loops
154  {
155  cindex = k+i;
156  vecvalues = Scol[k];
157  //cout << "MAX" << maxvalue << endl;
158  for (j = 0 ; j < localElems ; j++)
159  {
160  nentries = 0; // inserting one entry in each row for now
161  if (allSGID[cindex] == rows[j]) // diagonal entry
162  {
163  values[nentries] = vecvalues[j];
164  indices[nentries] = allSGID[cindex];
165  nentries++;
166  Sbar->InsertGlobalValues(rows[j], nentries, values, indices);
167  }
168  else if (abs(vecvalues[j]/maxvalue[k]) > relative_thres)
169  {
170  values[nentries] = vecvalues[j];
171  indices[nentries] = allSGID[cindex];
172  nentries++;
173  Sbar->InsertGlobalValues(rows[j], nentries, values, indices);
174  }
175  else
176  {
177  if (vecvalues[j] != 0.0) dropped++;
178  }
179  }
180  }
181  }
182 
183  probeop.ResetTempVectors(1);
184 
185  for ( ; i < totalElems ; i++)
186  {
187  Epetra_MultiVector probevec(rMap, 1); // TODO: Try doing more than one
188  Epetra_MultiVector Scol(rMap, 1); // vector at a time
189 
190  probevec.PutScalar(0.0);
191  if (i >= prefixSum - localElems && i < prefixSum)
192  {
193  probevec.ReplaceGlobalValue(allSGID[i], 0, 1.0);
194  }
195 
196 #ifdef TIMING_OUTPUT
197  app_time.start();
198 #endif
199  probeop.Apply(probevec, Scol);
200 #ifdef TIMING_OUTPUT
201  app_time.stop();
202 #endif
203  vecvalues = Scol[0];
204  Scol.MaxValue(maxvalue);
205  //cout << "MAX" << maxvalue << endl;
206  for (j = 0 ; j < localElems ; j++)
207  {
208  nentries = 0; // inserting one entry in each row for now
209  if (allSGID[i] == rows[j]) // diagonal entry
210  {
211  values[nentries] = vecvalues[j];
212  indices[nentries] = allSGID[i];
213  nentries++;
214  Sbar->InsertGlobalValues(rows[j], nentries, values, indices);
215  }
216  else if (abs(vecvalues[j]/maxvalue[0]) > relative_thres)
217  {
218  values[nentries] = vecvalues[j];
219  indices[nentries] = allSGID[i];
220  nentries++;
221  Sbar->InsertGlobalValues(rows[j], nentries, values, indices);
222  }
223  else
224  {
225  if (vecvalues[j] != 0.0) dropped++;
226  }
227  }
228  }
229 #ifdef TIMING_OUTPUT
230  ftime.stop();
231  cout << "Time in finding and dropping entries" << ftime.totalElapsedTime() << endl;
232  ftime.reset();
233 #endif
234 #ifdef TIMING_OUTPUT
235  cout << "Time in Apply of probing" << app_time.totalElapsedTime() << endl;
236 #endif
237  Sbar->FillComplete();
238  cout << "#dropped entries" << dropped << endl;
239  delete[] allSGID;
240  delete[] mySGID;
241  delete[] values;
242  delete[] indices;
243  delete[] maxvalue;
244 
245  return Sbar;
246 }
247 
248 /* Computes the approximate Schur complement for the wide separator */
249 Teuchos::RCP<Epetra_CrsMatrix> computeApproxWideSchur(shylu_config *config,
250  shylu_symbolic *ssym, // symbolic structure
251  Epetra_CrsMatrix *G, Epetra_CrsMatrix *R,
252  Epetra_LinearProblem *LP, Amesos_BaseSolver *solver,
253  Ifpack_Preconditioner *ifSolver, Epetra_CrsMatrix *C,
254  Epetra_Map *localDRowMap)
255 {
256  int i;
257  double relative_thres = config->relative_threshold;
258 
259  // Need to create local G (block diagonal portion) , R, C
260 
261  // Get row map of G
262  //Epetra_Map CrMap = C->RowMap();
263  //int *c_rows = CrMap.MyGlobalElements();
264  //int *c_cols = (C->ColMap()).MyGlobalElements();
265  //int c_totalElems = CrMap.NumGlobalElements();
266  //int c_localElems = CrMap.NumMyElements();
267  //int c_localcolElems = (C->ColMap()).NumMyElements();
268 
269  Epetra_Map GrMap = G->RowMap();
270  int *g_rows = GrMap.MyGlobalElements();
271  //int g_totalElems = GrMap.NumGlobalElements();
272  int g_localElems = GrMap.NumMyElements();
273 
274  //Epetra_Map RrMap = R->RowMap();
275  //int *r_rows = RrMap.MyGlobalElements();
276  //int *r_cols = (R->ColMap()).MyGlobalElements();
277  //int r_totalElems = RrMap.NumGlobalElements();
278  //int r_localElems = RrMap.NumMyElements();
279  //int r_localcolElems = (R->ColMap()).NumMyElements();
280 
281  Epetra_SerialComm LComm;
282  Epetra_Map G_localRMap (-1, g_localElems, g_rows, 0, LComm);
283 
284  int nentries1, gid;
285  // maxentries is the maximum of all three possible matrices as the arrays
286  // are reused between the three
287  int maxentries = max(C->MaxNumEntries(), R->MaxNumEntries());
288  maxentries = max(maxentries, G->MaxNumEntries());
289 
290  double *values1 = new double[maxentries];
291  double *values2 = new double[maxentries];
292  double *values3 = new double[maxentries];
293  int *indices1 = new int[maxentries];
294  int *indices2 = new int[maxentries];
295  int *indices3 = new int[maxentries];
296 
297  // Sbar - Approximate Schur complement
298  Teuchos::RCP<Epetra_CrsMatrix> Sbar = Teuchos::rcp(new Epetra_CrsMatrix(
299  Copy, GrMap, g_localElems));
300 
301  // Include only the block diagonal elements of G in localG
302  Epetra_CrsMatrix localG(Copy, G_localRMap, G->MaxNumEntries(), false);
303  int cnt, scnt;
304  for (i = 0; i < g_localElems ; i++)
305  {
306  gid = g_rows[i];
307  G->ExtractGlobalRowCopy(gid, maxentries, nentries1, values1, indices1);
308 
309  cnt = 0;
310  scnt = 0;
311  for (int j = 0 ; j < nentries1 ; j++)
312  {
313  if (G->LRID(indices1[j]) != -1)
314  {
315  values2[cnt] = values1[j];
316  indices2[cnt++] = indices1[j];
317  }
318  else
319  {
320  // Add it to Sbar immediately
321  values3[scnt] = values1[j];
322  indices3[scnt++] = indices1[j];
323  }
324  }
325 
326  localG.InsertGlobalValues(gid, cnt, values2, indices2);
327  Sbar->InsertGlobalValues(gid, scnt, values3, indices3);
328  }
329  localG.FillComplete();
330  //cout << "Created local G matrix" << endl;
331 
332  int nvectors = 16;
333  /*ShyLU_Probing_Operator probeop(&localG, &localR, LP, solver, &localC,
334  localDRowMap, nvectors);*/
335  ShyLU_Local_Schur_Operator probeop(config, ssym, &localG, R, LP, solver,
336  ifSolver, C, localDRowMap, nvectors);
337 
338 #ifdef DUMP_MATRICES
339  //ostringstream fnamestr;
340  //fnamestr << "localC" << C->Comm().MyPID() << ".mat";
341  //string Cfname = fnamestr.str();
342  //EpetraExt::RowMatrixToMatlabFile(Cfname.c_str(), localC);
343 
344  //Epetra_Map defMapg(-1, g_localElems, 0, localG.Comm());
345  //EpetraExt::ViewTransform<Epetra_CrsMatrix> * ReIdx_MatTransg =
346  //new EpetraExt::CrsMatrix_Reindex( defMapg );
347  //Epetra_CrsMatrix t2G = (*ReIdx_MatTransg)( localG );
348  //ReIdx_MatTransg->fwd();
349  //EpetraExt::RowMatrixToMatlabFile("localG.mat", t2G);
350 #endif
351 
352  //cout << " totalElems in Schur Complement" << totalElems << endl;
353  //cout << myPID << " localElems" << localElems << endl;
354 
355  // **************** Two collectives here *********************
356 #ifdef TIMING_OUTPUT
357  Teuchos::Time ftime("setup time");
358  ftime.start();
359 #endif
360 #ifdef TIMING_OUTPUT
361  Teuchos::Time app_time("setup time");
362 #endif
363 
364  int nentries;
365  // size > maxentries as there could be fill
366  // TODO: Currently the size of the two arrays can be one, Even if we switch
367  // the loop below the size of the array required is nvectors. Fix it
368  double *values = new double[nvectors];
369  int *indices = new int[nvectors];
370  double *vecvalues;
371 #ifdef SHYLU_DEBUG
372  // mfh 25 May 2015: Don't declare this variable if it's not used.
373  // It's only used if SHYLU_DEBUG is defined.
374  int dropped = 0;
375 #endif // SHYLU_DEBUG
376  double *maxvalue = new double[nvectors];
377 #ifdef TIMING_OUTPUT
378  ftime.start();
379 #endif
380  int findex = g_localElems / nvectors ;
381 
382  int cindex;
383  // int mypid = C->Comm().MyPID(); // unused
384  Epetra_MultiVector probevec (G_localRMap, nvectors);
385  Epetra_MultiVector Scol (G_localRMap, nvectors);
386  probevec.PutScalar(0.0);
387  for (i = 0 ; i < findex*nvectors ; i+=nvectors)
388  {
389  // Set the probevec to find block columns of S.
390  for (int k = 0; k < nvectors; k++)
391  {
392  cindex = k+i;
393  // TODO: Can do better than this, just need to go to the column map
394  // of C, there might be null columns in C
395  probevec.ReplaceGlobalValue(g_rows[cindex], k, 1.0);
396  //if (mypid == 0)
397  //cout << "Changing row to 1.0 " << g_rows[cindex] << endl;
398  }
399 
400 #ifdef TIMING_OUTPUT
401  app_time.start();
402 #endif
403  probeop.Apply(probevec, Scol);
404 #ifdef TIMING_OUTPUT
405  app_time.stop();
406 #endif
407 
408  // Reset the probevec to all zeros.
409  for (int k = 0; k < nvectors; k++)
410  {
411  cindex = k+i;
412  probevec.ReplaceGlobalValue(g_rows[cindex], k, 0.0);
413  }
414 
415  Scol.MaxValue(maxvalue);
416  nentries = 0;
417  for (int j = 0 ; j < g_localElems ; j++)
418  {
419  for (int k = 0; k < nvectors; k++)
420  {
421  cindex = k+i;
422  vecvalues = Scol[k];
423  if ((g_rows[cindex] == g_rows[j]) ||
424  (abs(vecvalues[j]/maxvalue[k]) > relative_thres))
425  // diagonal entry or large entry.
426  {
427  values[nentries] = vecvalues[j];
428  indices[nentries++] = g_rows[cindex];
429  }
430 #ifdef SHYLU_DEBUG
431  else if (vecvalues[j] != 0.0)
432  {
433  dropped++;
434  }
435 #endif // SHYLU_DEBUG
436  }
437  Sbar->InsertGlobalValues(g_rows[j], nentries, values,
438  indices);
439  nentries = 0;
440  }
441  }
442 
443  if (i < g_localElems)
444  {
445  nvectors = g_localElems - i;
446  probeop.ResetTempVectors(nvectors);
447  Epetra_MultiVector probevec1 (G_localRMap, nvectors);
448  Epetra_MultiVector Scol1 (G_localRMap, nvectors);
449 
450  probevec1.PutScalar(0.0);
451  for (int k = 0; k < nvectors; k++)
452  {
453  cindex = k+i;
454  // TODO: Can do better than this, just need to go to the column map
455  // of C, there might be null columns in C
456  probevec1.ReplaceGlobalValue(g_rows[cindex], k, 1.0);
457  }
458 
459 #ifdef TIMING_OUTPUT
460  app_time.start();
461 #endif
462  probeop.Apply(probevec1, Scol1);
463 #ifdef TIMING_OUTPUT
464  app_time.stop();
465 #endif
466  Scol1.MaxValue(maxvalue);
467  nentries = 0;
468  for (int j = 0 ; j < g_localElems ; j++)
469  {
470  //cout << "MAX" << maxvalue << endl;
471  for (int k = 0; k < nvectors; k++)
472  {
473  cindex = k+i;
474  vecvalues = Scol1[k];
475  //nentries = 0; // inserting one entry in each row for now
476  if ((g_rows[cindex] == g_rows[j]) ||
477  (abs(vecvalues[j]/maxvalue[k]) > relative_thres))
478  // diagonal entry or large entry.
479  {
480  values[nentries] = vecvalues[j];
481  indices[nentries++] = g_rows[cindex];
482  }
483 #ifdef SHYLU_DEBUG
484  else if (vecvalues[j] != 0.0)
485  {
486  dropped++;
487  }
488 #endif // SHYLU_DEBUG
489  }
490  Sbar->InsertGlobalValues(g_rows[j], nentries, values,
491  indices);
492  nentries = 0;
493  }
494  }
495 
496 #ifdef TIMING_OUTPUT
497  ftime.stop();
498  cout << "Time in finding and dropping entries" << ftime.totalElapsedTime()
499  << endl;
500  ftime.reset();
501  cout << "Time in Apply of probing" << app_time.totalElapsedTime() << endl;
502  probeop.PrintTimingInfo();
503 #endif
504  Sbar->FillComplete();
505 
506 #ifdef DUMP_MATRICES
507  Epetra_Map defMap2(-1, g_localElems, 0, C->Comm());
508  EpetraExt::ViewTransform<Epetra_CrsMatrix> * ReIdx_MatTrans2 =
509  new EpetraExt::CrsMatrix_Reindex( defMap2 );
510  Epetra_CrsMatrix t2S = (*ReIdx_MatTrans2)( *Sbar );
511  ReIdx_MatTrans2->fwd();
512  EpetraExt::RowMatrixToMatlabFile("Schur.mat", t2S);
513 #endif
514 
515 #ifdef SHYLU_DEBUG
516  cout << "#dropped entries" << dropped << endl;
517 #endif
518  delete[] values;
519  delete[] indices;
520  delete[] values1;
521  delete[] indices1;
522  delete[] values2;
523  delete[] indices2;
524  delete[] values3;
525  delete[] indices3;
526  delete[] maxvalue;
527 
528  return Sbar;
529 }
530 
531 /* Computes the approximate Schur complement for the wide separator
532  using guided probing*/
533 Teuchos::RCP<Epetra_CrsMatrix> computeSchur_GuidedProbing
534 (
535  shylu_config *config,
536  shylu_symbolic *ssym, // symbolic structure
537  shylu_data *data, // numeric structure
538  Epetra_Map *localDRowMap
539 )
540 {
541  int i;
542  double relative_thres = config->relative_threshold;
543 
544  Epetra_CrsMatrix *G = ssym->G.getRawPtr();
545  Epetra_CrsMatrix *R = ssym->R.getRawPtr();
546  Epetra_LinearProblem *LP = ssym->LP.getRawPtr();
547  Amesos_BaseSolver *solver = ssym->Solver.getRawPtr();
548  Ifpack_Preconditioner *ifSolver = ssym->ifSolver.getRawPtr();
549  Epetra_CrsMatrix *C = ssym->C.getRawPtr();
550 
551  // Need to create local G (block diagonal portion) , R, C
552 
553  // Get row map of G
554  Epetra_Map CrMap = C->RowMap();
555  int *c_rows = CrMap.MyGlobalElements();
556  int *c_cols = (C->ColMap()).MyGlobalElements();
557  //int c_totalElems = CrMap.NumGlobalElements();
558  int c_localElems = CrMap.NumMyElements();
559  int c_localcolElems = (C->ColMap()).NumMyElements();
560 
561  Epetra_Map GrMap = G->RowMap();
562  int *g_rows = GrMap.MyGlobalElements();
563  //int g_totalElems = GrMap.NumGlobalElements();
564  int g_localElems = GrMap.NumMyElements();
565 
566  Epetra_Map RrMap = R->RowMap();
567  int *r_rows = RrMap.MyGlobalElements();
568  int *r_cols = (R->ColMap()).MyGlobalElements();
569  //int r_totalElems = RrMap.NumGlobalElements();
570  int r_localElems = RrMap.NumMyElements();
571  int r_localcolElems = (R->ColMap()).NumMyElements();
572 
573  Epetra_SerialComm LComm;
574  Epetra_Map C_localRMap (-1, c_localElems, c_rows, 0, LComm);
575  Epetra_Map C_localCMap (-1, c_localcolElems, c_cols, 0, LComm);
576  Epetra_Map G_localRMap (-1, g_localElems, g_rows, 0, LComm);
577  Epetra_Map R_localRMap (-1, r_localElems, r_rows, 0, LComm);
578  Epetra_Map R_localCMap (-1, r_localcolElems, r_cols, 0, LComm);
579 
580  //cout << "#local rows" << g_localElems << "#non zero local cols" << c_localcolElems << endl;
581 
582 #ifdef DEBUG
583  cout << "DEBUG MODE" << endl;
584  int nrows = C->RowMap().NumMyElements();
585  assert(nrows == localDRowMap->NumGlobalElements());
586 
587  int gids[nrows], gids1[nrows];
588  C_localRMap.MyGlobalElements(gids);
589  localDRowMap->MyGlobalElements(gids1);
590  cout << "Comparing R's domain map with D's row map" << endl;
591 
592  for (int i = 0; i < nrows; i++)
593  {
594  assert(gids[i] == gids1[i]);
595  }
596 #endif
597 
598  int nentries1, gid;
599  // maxentries is the maximum of all three possible matrices as the arrays
600  // are reused between the three
601  int maxentries = max(C->MaxNumEntries(), R->MaxNumEntries());
602  maxentries = max(maxentries, G->MaxNumEntries());
603 
604  double *values1 = new double[maxentries];
605  double *values2 = new double[maxentries];
606  double *values3 = new double[maxentries];
607  int *indices1 = new int[maxentries];
608  int *indices2 = new int[maxentries];
609  int *indices3 = new int[maxentries];
610 
611  //cout << "Creating local matrices" << endl;
612  int err;
613  Epetra_CrsMatrix localC(Copy, C_localRMap, C->MaxNumEntries(), false);
614  for (i = 0; i < c_localElems ; i++)
615  {
616  gid = c_rows[i];
617  err = C->ExtractGlobalRowCopy(gid, maxentries, nentries1, values1,
618  indices1);
619  assert (err == 0);
620  //if (nentries1 > 0) // TODO: Later
621  //{
622  err = localC.InsertGlobalValues(gid, nentries1, values1, indices1);
623  assert (err == 0);
624  //}
625  }
626  localC.FillComplete(G_localRMap, C_localRMap);
627 
628  //cout << "Created local C matrix" << endl;
629 
630  Epetra_CrsMatrix localR(Copy, R_localRMap, R->MaxNumEntries(), false);
631  for (i = 0; i < r_localElems ; i++)
632  {
633  gid = r_rows[i];
634  R->ExtractGlobalRowCopy(gid, maxentries, nentries1, values1, indices1);
635  localR.InsertGlobalValues(gid, nentries1, values1, indices1);
636  }
637  localR.FillComplete(*localDRowMap, R_localRMap);
638  //cout << "Created local R matrix" << endl;
639 
640  // Sbar - Approximate Schur complement
641  Teuchos::RCP<Epetra_CrsMatrix> Sbar = Teuchos::rcp(new Epetra_CrsMatrix(
642  Copy, GrMap, g_localElems));
643 
644  // Include only the block diagonal elements of G in localG
645  Epetra_CrsMatrix localG(Copy, G_localRMap, G->MaxNumEntries(), false);
646  int cnt, scnt;
647  for (i = 0; i < g_localElems ; i++)
648  {
649  gid = g_rows[i];
650  G->ExtractGlobalRowCopy(gid, maxentries, nentries1, values1, indices1);
651 
652  cnt = 0;
653  scnt = 0;
654  for (int j = 0 ; j < nentries1 ; j++)
655  {
656  if (G->LRID(indices1[j]) != -1)
657  {
658  values2[cnt] = values1[j];
659  indices2[cnt++] = indices1[j];
660  }
661  else
662  {
663  // Add it to Sbar immediately
664  values3[scnt] = values1[j];
665  indices3[scnt++] = indices1[j];
666  }
667  }
668 
669  localG.InsertGlobalValues(gid, cnt, values2, indices2);
670  Sbar->InsertGlobalValues(gid, scnt, values3, indices3);
671  }
672  localG.FillComplete();
673  cout << "Created local G matrix" << endl;
674 
675  int nvectors = 16;
676  ShyLU_Probing_Operator probeop(config, ssym, &localG, &localR, LP, solver,
677  ifSolver, &localC, localDRowMap, nvectors);
678 
679 #ifdef DUMP_MATRICES
680  //ostringstream fnamestr;
681  //fnamestr << "localC" << C->Comm().MyPID() << ".mat";
682  //string Cfname = fnamestr.str();
683  //EpetraExt::RowMatrixToMatlabFile(Cfname.c_str(), localC);
684 
685  //Epetra_Map defMapg(-1, g_localElems, 0, localG.Comm());
686  //EpetraExt::ViewTransform<Epetra_CrsMatrix> * ReIdx_MatTransg =
687  //new EpetraExt::CrsMatrix_Reindex( defMapg );
688  //Epetra_CrsMatrix t2G = (*ReIdx_MatTransg)( localG );
689  //ReIdx_MatTransg->fwd();
690  //EpetraExt::RowMatrixToMatlabFile("localG.mat", t2G);
691 #endif
692 
693  //cout << " totalElems in Schur Complement" << totalElems << endl;
694  //cout << myPID << " localElems" << localElems << endl;
695 
696  // **************** Two collectives here *********************
697 #ifdef TIMING_OUTPUT
698  Teuchos::Time ftime("setup time");
699 #endif
700 #ifdef TIMING_OUTPUT
701  Teuchos::Time app_time("setup time");
702 #endif
703 
704  Teuchos::RCP<Epetra_CrsGraph> lSGraph = Teuchos::RCP<Epetra_CrsGraph> (
705  new Epetra_CrsGraph(Copy, G_localRMap, maxentries));
706 
707  if (data->num_compute % config->reset_iter == 0)
708  {
709  int nentries;
710  // size > maxentries as there could be fill
711  // TODO: Currently the size of the two arrays can be one, Even if we switch
712  // the loop below the size of the array required is nvectors. Fix it
713  double *values = new double[g_localElems];
714  int *indices = new int[g_localElems];
715  double *vecvalues;
716  int dropped = 0;
717  double *maxvalue = new double[nvectors];
718 #ifdef TIMING_OUTPUT
719  ftime.start();
720 #endif
721  int findex = g_localElems / nvectors ;
722 
723  int cindex;
724  // int mypid = C->Comm().MyPID(); // unused
725  Epetra_MultiVector probevec(G_localRMap, nvectors);
726  Epetra_MultiVector Scol(G_localRMap, nvectors);
727  for (i = 0 ; i < findex*nvectors ; i+=nvectors)
728  {
729  probevec.PutScalar(0.0); // TODO: Move it out
730  for (int k = 0; k < nvectors; k++)
731  {
732  cindex = k+i;
733  // TODO: Can do better than this, just need to go to the column map
734  // of C, there might be null columns in C
735  // Not much of use for Shasta 2x2 .. Later.
736  probevec.ReplaceGlobalValue(g_rows[cindex], k, 1.0);
737  //if (mypid == 0)
738  //cout << "Changing row to 1.0 " << g_rows[cindex] << endl;
739  }
740 
741 #ifdef TIMING_OUTPUT
742  app_time.start();
743 #endif
744  probeop.Apply(probevec, Scol);
745 #ifdef TIMING_OUTPUT
746  app_time.stop();
747 #endif
748 
749  Scol.MaxValue(maxvalue);
750  for (int k = 0; k < nvectors; k++) //TODO:Need to switch these loops
751  {
752  cindex = k+i;
753  vecvalues = Scol[k];
754  //cout << "MAX" << maxvalue << endl;
755  for (int j = 0 ; j < g_localElems ; j++)
756  {
757  nentries = 0; // inserting one entry in each row for now
758  if (g_rows[cindex] == g_rows[j]) // diagonal entry
759  {
760  values[nentries] = vecvalues[j];
761  indices[nentries] = g_rows[cindex];
762  nentries++;
763  err = Sbar->InsertGlobalValues(g_rows[j], nentries, values,
764  indices);
765  assert(err >= 0);
766  err = lSGraph->InsertGlobalIndices(g_rows[j], nentries,
767  indices);
768  assert(err >= 0);
769  }
770  else if (abs(vecvalues[j]/maxvalue[k]) > relative_thres)
771  {
772  values[nentries] = vecvalues[j];
773  indices[nentries] = g_rows[cindex];
774  nentries++;
775  err = Sbar->InsertGlobalValues(g_rows[j], nentries, values,
776  indices);
777  assert(err >= 0);
778  err = lSGraph->InsertGlobalIndices(g_rows[j], nentries,
779  indices);
780  assert(err >= 0);
781  }
782  else
783  {
784  if (vecvalues[j] != 0.0)
785  {
786  dropped++;
787  //cout << "vecvalues[j]" << vecvalues[j] <<
788  // " max" << maxvalue[k] << endl;
789  }
790  }
791  }
792  }
793  }
794 
795  probeop.ResetTempVectors(1);
796 
797  for ( ; i < g_localElems ; i++)
798  {
799  // TODO: Can move the next two decalarations outside the loop
800  Epetra_MultiVector probevec(G_localRMap, 1);
801  Epetra_MultiVector Scol(G_localRMap, 1);
802 
803  probevec.PutScalar(0.0);
804  // TODO: Can do better than this, just need to go to the column map
805  // of C, there might be null columns in C
806  probevec.ReplaceGlobalValue(g_rows[i], 0, 1.0);
807 
808 #ifdef TIMING_OUTPUT
809  app_time.start();
810 #endif
811  probeop.Apply(probevec, Scol);
812 #ifdef TIMING_OUTPUT
813  app_time.stop();
814 #endif
815  vecvalues = Scol[0];
816  Scol.MaxValue(maxvalue);
817  //cout << "MAX" << maxvalue << endl;
818  for (int j = 0 ; j < g_localElems ; j++)
819  {
820  nentries = 0; // inserting one entry in each row for now
821  if (g_rows[i] == g_rows[j]) // diagonal entry
822  {
823  values[nentries] = vecvalues[j];
824  indices[nentries] = g_rows[i];
825  nentries++;
826  err = Sbar->InsertGlobalValues(g_rows[j], nentries, values, indices);
827  assert(err >= 0);
828  err = lSGraph->InsertGlobalIndices(g_rows[j], nentries, indices);
829  assert(err >= 0);
830  }
831  else if (abs(vecvalues[j]/maxvalue[0]) > relative_thres)
832  {
833  values[nentries] = vecvalues[j];
834  indices[nentries] = g_rows[i];
835  nentries++;
836  err = Sbar->InsertGlobalValues(g_rows[j], nentries, values, indices);
837  assert(err >= 0);
838  err = lSGraph->InsertGlobalIndices(g_rows[j], nentries, indices);
839  assert(err >= 0);
840  }
841  else
842  {
843  if (vecvalues[j] != 0.0) dropped++;
844  }
845  }
846  }
847 
848 #ifdef TIMING_OUTPUT
849  ftime.stop();
850  cout << "Time in finding and dropping entries" << ftime.totalElapsedTime() << endl;
851  ftime.reset();
852 #endif
853 #ifdef TIMING_OUTPUT
854  cout << "Time in Apply of probing" << app_time.totalElapsedTime() << endl;
855 #endif
856  probeop.PrintTimingInfo();
857  Sbar->FillComplete();
858  lSGraph->FillComplete();
859 
860  data->localSbargraph = lSGraph;
861 
862 #ifdef DUMP_MATRICES
863  Epetra_Map defMap2(-1, g_localElems, 0, C->Comm());
864  EpetraExt::ViewTransform<Epetra_CrsMatrix> * ReIdx_MatTrans2 =
865  new EpetraExt::CrsMatrix_Reindex( defMap2 );
866  Epetra_CrsMatrix t2S = (*ReIdx_MatTrans2)( *Sbar );
867  ReIdx_MatTrans2->fwd();
868  EpetraExt::RowMatrixToMatlabFile("Schur.mat", t2S);
869 #endif
870 
871  cout << "#dropped entries" << dropped << endl;
872  delete[] values;
873  delete[] indices;
874  delete[] maxvalue;
875  }
876  else
877  {
878  if (((data->num_compute-1) % config->reset_iter) == 0)
879  {
880  // We recomputed the Schur complement with dropping for the last
881  // compute. Reset the prober with the new orthogonal vectors for
882  // the Sbar from the previous iteration.
883  Teuchos::ParameterList pList;
884  Teuchos::RCP<Isorropia::Epetra::Prober> gprober =
885  Teuchos::RCP<Isorropia::Epetra::Prober> (new
886  Isorropia::Epetra::Prober(
887  data->localSbargraph.getRawPtr(), pList, false));
888  gprober->color();
889  data->guided_prober = gprober;
890 
891  }
892  // Use the prober to probe the probeop for the sparsity pattern
893  // add that to Sbar and call Fill complete
894  int nvectors = data->guided_prober->getNumOrthogonalVectors();
895  cout << "Number of Orthogonal Vectors for guided probing" << nvectors
896  << endl;
897 
898  probeop.ResetTempVectors(nvectors);
899  Teuchos::RCP<Epetra_CrsMatrix> blockdiag_Sbar =
900  data->guided_prober->probe(probeop);
901  int maxentries = blockdiag_Sbar->GlobalMaxNumEntries();
902  int *indices = new int[maxentries];
903  double *values = new double[maxentries];
904 
905  int numentries;
906  for (int i = 0; i < blockdiag_Sbar->NumGlobalRows() ; i++)
907  {
908  int gid = blockdiag_Sbar->GRID(i);
909  blockdiag_Sbar->ExtractGlobalRowCopy(gid, maxentries, numentries,
910  values, indices);
911  Sbar->InsertGlobalValues(gid, numentries, values, indices);
912  }
913 
914  Sbar->FillComplete();
915  delete[] indices;
916  delete[] values;
917  }
918 
919  delete[] values1;
920  delete[] indices1;
921  delete[] values2;
922  delete[] indices2;
923  delete[] values3;
924  delete[] indices3;
925  return Sbar;
926 }
Teuchos::RCP< Epetra_CrsMatrix > computeSchur_GuidedProbing(shylu_config *config, shylu_symbolic *ssym, shylu_data *data, Epetra_Map *localDRowMap)
Compute an approximate Schur Complement using the option of Guided Probing.
Teuchos::RCP< Epetra_CrsMatrix > computeApproxSchur(shylu_config *config, shylu_symbolic *ssym, Epetra_CrsMatrix *G, Epetra_CrsMatrix *R, Epetra_LinearProblem *LP, Amesos_BaseSolver *solver, Ifpack_Preconditioner *ifSolver, Epetra_CrsMatrix *C, Epetra_Map *localDRowMap)
Compute an approximate Schur Complement (Narrow Sep)
Definition: shylu_schur.cpp:56
Teuchos::RCP< Epetra_CrsMatrix > computeApproxWideSchur(shylu_config *config, shylu_symbolic *ssym, Epetra_CrsMatrix *G, Epetra_CrsMatrix *R, Epetra_LinearProblem *LP, Amesos_BaseSolver *solver, Ifpack_Preconditioner *ifSolver, Epetra_CrsMatrix *C, Epetra_Map *localDRowMap)
Compute an approximate Shur Complete (Wide Sep)
Utilities for ShyLU.
Main data structure holding needed offset and temp variables.
Definition: shylu.h:108
Main header file of ShyLU (Include main user calls)