/[escript]/trunk/paso/src/Pattern_unrollBlocks.c
ViewVC logotype

Annotation of /trunk/paso/src/Pattern_unrollBlocks.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1556 - (hide annotations)
Mon May 12 00:54:58 2008 UTC (11 years, 9 months ago) by gross
File MIME type: text/plain
File size: 4418 byte(s)
Modification to allow mixed mode execution. 
In order to keep the code portable accross platform all MPI calls within
parallel regions have been moved. 


1 ksteube 1315
2     /* $Id: Pattern_unrollBlocks.c 1306 2007-09-18 05:51:09Z ksteube $ */
3    
4     /*******************************************************
5     *
6     * Copyright 2003-2007 by ACceSS MNRF
7     * Copyright 2007 by University of Queensland
8     *
9     * http://esscc.uq.edu.au
10     * Primary Business: Queensland, Australia
11     * Licensed under the Open Software License version 3.0
12     * http://www.opensource.org/licenses/osl-3.0.php
13     *
14     *******************************************************/
15    
16     /**************************************************************/
17    
18     /* Paso: Pattern_unrollBlocks */
19    
20     /**************************************************************/
21    
22     /* Author: gross@access.edu.au */
23    
24     /**************************************************************/
25    
26     #include "Paso.h"
27     #include "Pattern.h"
28    
29     /**************************************************************/
30    
31     /* creates Pattern */
32    
33     Paso_Pattern* Paso_Pattern_unrollBlocks(Paso_Pattern* pattern, \
34     int type, dim_t output_block_size,dim_t input_block_size) {
35     Paso_Pattern*out=NULL;
36     index_t *ptr=NULL,*index=NULL,iPtr;
37     dim_t i,j,k,l, block_size, new_n_ptr, new_len, new_numOutput;
38     index_t index_offset_in=(pattern->type & PATTERN_FORMAT_OFFSET1 ? 1:0);
39     index_t index_offset_out=(type & PATTERN_FORMAT_OFFSET1 ? 1:0);
40    
41     Paso_resetError();
42     if ((pattern->type & PATTERN_FORMAT_SYM) != (type & PATTERN_FORMAT_SYM)) {
43     Paso_setError(TYPE_ERROR,"Paso_Pattern_unrollBlocks: conversion between symmetric and non-symmetric is not implemented yet");
44     return NULL;
45     }
46     if (( (pattern->type & PATTERN_FORMAT_OFFSET1) == (type & PATTERN_FORMAT_OFFSET1)) &&
47     (pattern->input_block_size == input_block_size) &&
48     (pattern->output_block_size == output_block_size)) {
49    
50     out = Paso_Pattern_getReference(pattern);
51     } else {
52     if ( ( (pattern->input_block_size >1) && (input_block_size != pattern->input_block_size) ) ||
53     ( (pattern->output_block_size >1) && (output_block_size != pattern->output_block_size) ) ) {
54    
55     Paso_setError(TYPE_ERROR,"Paso_Pattern_unrollBlocks: unrolling requires matching block sizes or block size one for input pattern.");
56     return NULL;
57     }
58     printf("Information: matrix pattern is unrolled to block size %d x %d with offset %d.\n",output_block_size,input_block_size,index_offset_out);
59     block_size=output_block_size*input_block_size;
60     new_len=(pattern->len)*block_size;
61     new_numOutput=(pattern->numOutput)*output_block_size;
62    
63     ptr=MEMALLOC(new_numOutput+1,index_t);
64     index=MEMALLOC(new_len,index_t);
65     if (! ( Paso_checkPtr(ptr) || Paso_checkPtr(index) ) ) {
66     #pragma omp parallel
67     {
68     #pragma omp for private(i) schedule(static)
69     for (i=0;i<new_numOutput+1;++i) ptr[i]=index_offset_out;
70    
71 gross 1556 #pragma omp single
72 ksteube 1315 ptr[new_numOutput]=new_len+index_offset_out;
73    
74     #pragma omp for private(i,k) schedule(static)
75     for (i=0;i<pattern->numOutput;++i)
76     for (k=0;k<output_block_size;++k) ptr[i*output_block_size+k]=(pattern->ptr[i]-index_offset_in)*block_size+(pattern->ptr[i+1]-pattern->ptr[i])*input_block_size*k+index_offset_out;
77    
78     #pragma omp for private(i,iPtr) schedule(static)
79     for (i=0;i<new_numOutput;++i) {
80     #pragma ivdep
81     for (iPtr=ptr[i]-index_offset_out;iPtr<ptr[i+1]-index_offset_out;++iPtr) index[iPtr]=index_offset_out;
82     }
83    
84     #pragma omp for private(i,j,iPtr,k) schedule(static)
85     for (i=0;i<pattern->numOutput;++i) {
86     for (iPtr=pattern->ptr[i]-index_offset_in;iPtr<pattern->ptr[i+1]-index_offset_in;++iPtr) {
87     for (k=0;k<output_block_size;++k) {
88     #pragma ivdep
89     for (j=0;j<input_block_size;++j) {
90     index[ptr[i*output_block_size+k]-index_offset_out+(iPtr-(pattern->ptr[i]-index_offset_in))*input_block_size+j]=(pattern->index[iPtr]-index_offset_in)*input_block_size+j+index_offset_out;
91     }
92     }
93     }
94     }
95     }
96     out=Paso_Pattern_alloc(type,pattern->input_block_size * input_block_size,pattern->output_block_size * output_block_size,new_numOutput,ptr,index);
97     }
98     if (! Paso_noError()) {
99     MEMFREE(index);
100     MEMFREE(ptr);
101     }
102     }
103     return out;
104     }

  ViewVC Help
Powered by ViewVC 1.1.26