/[escript]/trunk/paso/src/Pattern_unrollBlocks.c
ViewVC logotype

Contents of /trunk/paso/src/Pattern_unrollBlocks.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1736 - (show annotations)
Fri Aug 29 02:23:16 2008 UTC (10 years, 10 months ago) by gross
File MIME type: text/plain
File size: 4488 byte(s)
This fixes a problem which is typically arising when using reduced order
with MPI and a "small" number of elements per processor. In this case it
can happen that the couple matrix is not using all entries sent to the
processor. The old implementations assumed that the indices will cover
the entire input. This assumption has been removed.


1
2 /* $Id: Pattern_unrollBlocks.c 1306 2007-09-18 05:51:09Z ksteube $ */
3
4 /*******************************************************
5 *
6 * Copyright 2003-2007 by ACceSS MNRF
7 * Copyright 2007 by University of Queensland
8 *
9 * http://esscc.uq.edu.au
10 * Primary Business: Queensland, Australia
11 * Licensed under the Open Software License version 3.0
12 * http://www.opensource.org/licenses/osl-3.0.php
13 *
14 *******************************************************/
15
16 /**************************************************************/
17
18 /* Paso: Pattern_unrollBlocks */
19
20 /**************************************************************/
21
22 /* Author: gross@access.edu.au */
23
24 /**************************************************************/
25
26 #include "Paso.h"
27 #include "Pattern.h"
28
29 /**************************************************************/
30
31 /* creates Pattern */
32
33 Paso_Pattern* Paso_Pattern_unrollBlocks(Paso_Pattern* pattern, \
34 int type, dim_t output_block_size,dim_t input_block_size) {
35 Paso_Pattern*out=NULL;
36 index_t *ptr=NULL,*index=NULL,iPtr;
37 dim_t i,j,k, block_size, new_len, new_numOutput, new_numInput;
38 index_t index_offset_in=(pattern->type & PATTERN_FORMAT_OFFSET1 ? 1:0);
39 index_t index_offset_out=(type & PATTERN_FORMAT_OFFSET1 ? 1:0);
40
41 Paso_resetError();
42 if ((pattern->type & PATTERN_FORMAT_SYM) != (type & PATTERN_FORMAT_SYM)) {
43 Paso_setError(TYPE_ERROR,"Paso_Pattern_unrollBlocks: conversion between symmetric and non-symmetric is not implemented yet");
44 return NULL;
45 }
46 if (( (pattern->type & PATTERN_FORMAT_OFFSET1) == (type & PATTERN_FORMAT_OFFSET1)) &&
47 (pattern->input_block_size == input_block_size) &&
48 (pattern->output_block_size == output_block_size)) {
49
50 out = Paso_Pattern_getReference(pattern);
51 } else {
52 if ( ( (pattern->input_block_size >1) && (input_block_size != pattern->input_block_size) ) ||
53 ( (pattern->output_block_size >1) && (output_block_size != pattern->output_block_size) ) ) {
54
55 Paso_setError(TYPE_ERROR,"Paso_Pattern_unrollBlocks: unrolling requires matching block sizes or block size one for input pattern.");
56 return NULL;
57 }
58 printf("Information: matrix pattern is unrolled to block size %d x %d with offset %d.\n",output_block_size,input_block_size,index_offset_out);
59 block_size=output_block_size*input_block_size;
60 new_len=(pattern->len)*block_size;
61 new_numOutput=(pattern->numOutput)*output_block_size;
62 new_numInput=(pattern->numInput)*input_block_size;
63
64 ptr=MEMALLOC(new_numOutput+1,index_t);
65 index=MEMALLOC(new_len,index_t);
66 if (! ( Paso_checkPtr(ptr) || Paso_checkPtr(index) ) ) {
67 #pragma omp parallel
68 {
69 #pragma omp for private(i) schedule(static)
70 for (i=0;i<new_numOutput+1;++i) ptr[i]=index_offset_out;
71
72 #pragma omp single
73 ptr[new_numOutput]=new_len+index_offset_out;
74
75 #pragma omp for private(i,k) schedule(static)
76 for (i=0;i<pattern->numOutput;++i)
77 for (k=0;k<output_block_size;++k) ptr[i*output_block_size+k]=(pattern->ptr[i]-index_offset_in)*block_size+(pattern->ptr[i+1]-pattern->ptr[i])*input_block_size*k+index_offset_out;
78
79 #pragma omp for private(i,iPtr) schedule(static)
80 for (i=0;i<new_numOutput;++i) {
81 #pragma ivdep
82 for (iPtr=ptr[i]-index_offset_out;iPtr<ptr[i+1]-index_offset_out;++iPtr) index[iPtr]=index_offset_out;
83 }
84
85 #pragma omp for private(i,j,iPtr,k) schedule(static)
86 for (i=0;i<pattern->numOutput;++i) {
87 for (iPtr=pattern->ptr[i]-index_offset_in;iPtr<pattern->ptr[i+1]-index_offset_in;++iPtr) {
88 for (k=0;k<output_block_size;++k) {
89 #pragma ivdep
90 for (j=0;j<input_block_size;++j) {
91 index[ptr[i*output_block_size+k]-index_offset_out+(iPtr-(pattern->ptr[i]-index_offset_in))*input_block_size+j]=(pattern->index[iPtr]-index_offset_in)*input_block_size+j+index_offset_out;
92 }
93 }
94 }
95 }
96 }
97 out=Paso_Pattern_alloc(type,pattern->input_block_size * input_block_size,pattern->output_block_size * output_block_size,new_numOutput,new_numInput,ptr,index);
98 }
99 if (! Paso_noError()) {
100 MEMFREE(index);
101 MEMFREE(ptr);
102 }
103 }
104 return out;
105 }

  ViewVC Help
Powered by ViewVC 1.1.26