Wednesday, February 19, 2014

convert oracle charactersets from US7ASCII to AL32UTF8

For historical reason, when one of the DSS system was upgrade from oracle 9i to 11g, the charactersets are kept as US7ASCII, which does not support french accent properly, this need to be corrected by converting the charactersets from US7ASCII to AL32UTF8.

I use metalink document Doc ID 260192.1 as a guide.
Changing the NLS_CHARACTERSET to AL32UTF8 / UTF8 (Unicode) in 8i, 9i , 10g and 11g (Doc ID 260192.1)

The document has a lot of details, I only document the one that related to my environment.

Note: I use csscan to find the lossy data, use DMU to convert the charactersets.

step 1:run csscan to generate the report.

csscan \"sys/syspassword as sysdba\" full=yes fromchar=us7ascii tochar=us7ascii log=dbcheck capture=y array=1000000 process=2

it took about 2 hours to run against 500GB database.

This step generate the dbcheck.txt, dbcheck.out, and dbcheck.err report.


step 2:  generate the lossy data query

select 'select '||column_name||' from '||owner_name||'.'||table_name||' where rowid='||''''||data_rowid||''';' from csmig.csmv$errors

sample output is:
select ADVADR2 from COLUMADM.TMP_IBMS_ADV_VW where rowid='AAA7f6AAGAABPTOAAM';

when I run this query, it shows the lossy data
ADVADR2
------------------------------
ST-LAZARRE, QU┐BEC

there are totally 563 lossy rows.

step 3: Fix the data

DMU does not allow me to run the charactersets conversion if the data issue is not fixed.

the lossy data can be fixed by running a DML like below:
update  owner.table_name set column_name='blabla' where rowid='blabla'

but we chose to use pl/sql script to deal with it.
here is the pl/sql code that we used to update the data.

create table tmp_tables_for_update
(
   user_id varchar2(100),
   table_id varchar2(100),
   column_id varchar2(100)
)
create table tmp_string (
str_record varchar2(500) )


declare 

 l_length number;
 l_symbol varchar2(10);
 l_code number;

cursor c_select is
select * from tmp_string;

begin

   for rec in c_select loop
   
      l_length := length(rec.str_record);
      for ind in 1..l_length loop
         l_symbol := substr(rec.str_record,ind,1);
         l_code := ascii(substr(rec.str_record,ind,1));    
         if l_code > 127 then
            
            dbms_output.put_line('Code:'||l_code||' Symbol:'||l_symbol); 
            
            insert into tmp_code values (l_code);
         end if;   
      end loop;
   end loop;
   
   commit;
   
EXCEPTION

  WHEN OTHERS THEN
    user_utility.print_line('Error code:'||SQLCODE);
    user_utility.print_line('Error message:'||SQLERRM);

end;
/

declare


l_count NUMBER := 0;

l_column_name VARCHAR2(10);
l_statement VARCHAR2(1000);
l_quote VARCHAR2(1) := '''';
l_palki VARCHAR2(3) := '||';

cursor c_table is
select distinct user_id,table_id,column_id
 from tmp_tables_for_update
 --where table_id = 'LOAD_CONTACTS'
-- and column_id = 'FIRST_NAME'
 ;

BEGIN

 for rec in c_table loop
    ---------------130-------------------------  
     l_statement := 'UPDATE '||rec.user_id||'.'||rec.table_id||
                    ' SET '||rec.column_id||' = REPLACE('||rec.column_id||',CHR(130),'||l_quote||'@1'||l_quote||')'||
                    ' WHERE '|| rec.column_id||' LIKE '||l_quote||'%'||l_quote||l_palki||' CHR(130) '||l_palki||l_quote||'%'||l_quote;
                    
   --   dbms_output.put_line('Statement:'|| l_statement);             

     execute immediate l_statement;
     commit;

    ---------------144-------------------------  
     l_statement := 'UPDATE '||rec.user_id||'.'||rec.table_id||
                    ' SET '||rec.column_id||' = REPLACE('||rec.column_id||',CHR(144),'||l_quote||'@2'||l_quote||')'||
                    ' WHERE '|| rec.column_id||' LIKE '||l_quote||'%'||l_quote||l_palki||' CHR(144) '||l_palki||l_quote||'%'||l_quote;
                    
   --   dbms_output.put_line('Statement:'|| l_statement);             

     execute immediate l_statement;
     commit;

     ---------------200-------------------------  
     l_statement := 'UPDATE '||rec.user_id||'.'||rec.table_id||
                    ' SET '||rec.column_id||' = REPLACE('||rec.column_id||',CHR(200),'||l_quote||'@3'||l_quote||')'||
                    ' WHERE '|| rec.column_id||' LIKE '||l_quote||'%'||l_quote||l_palki||' CHR(200) '||l_palki||l_quote||'%'||l_quote;
                    
   --   dbms_output.put_line('Statement:'|| l_statement);             

     execute immediate l_statement;
     commit;

     ---------------201-------------------------  
     l_statement := 'UPDATE '||rec.user_id||'.'||rec.table_id||
                    ' SET '||rec.column_id||' = REPLACE('||rec.column_id||',CHR(201),'||l_quote||'@4'||l_quote||')'||
                    ' WHERE '|| rec.column_id||' LIKE '||l_quote||'%'||l_quote||l_palki||' CHR(201) '||l_palki||l_quote||'%'||l_quote;
                    
   --   dbms_output.put_line('Statement:'|| l_statement);             

     execute immediate l_statement;
     commit;

     ---------------206-------------------------  
     l_statement := 'UPDATE '||rec.user_id||'.'||rec.table_id||
                    ' SET '||rec.column_id||' = REPLACE('||rec.column_id||',CHR(206),'||l_quote||'@5'||l_quote||')'||
                    ' WHERE '|| rec.column_id||' LIKE '||l_quote||'%'||l_quote||l_palki||' CHR(206) '||l_palki||l_quote||'%'||l_quote;
                    
   --   dbms_output.put_line('Statement:'|| l_statement);             

     execute immediate l_statement;
     commit;



     ---------------212-------------------------  
     l_statement := 'UPDATE '||rec.user_id||'.'||rec.table_id||
                    ' SET '||rec.column_id||' = REPLACE('||rec.column_id||',CHR(212),'||l_quote||'@6'||l_quote||')'||
                    ' WHERE '|| rec.column_id||' LIKE '||l_quote||'%'||l_quote||l_palki||' CHR(212) '||l_palki||l_quote||'%'||l_quote;
                    
   --   dbms_output.put_line('Statement:'|| l_statement);             

     execute immediate l_statement;
     commit;

     ---------------233-------------------------  
     l_statement := 'UPDATE '||rec.user_id||'.'||rec.table_id||
                    ' SET '||rec.column_id||' = REPLACE('||rec.column_id||',CHR(233),'||l_quote||'@7'||l_quote||')'||
                    ' WHERE '|| rec.column_id||' LIKE '||l_quote||'%'||l_quote||l_palki||' CHR(233) '||l_palki||l_quote||'%'||l_quote;
                    
   --   dbms_output.put_line('Statement:'|| l_statement);             

     execute immediate l_statement;
     commit;



 end loop;

   
EXCEPTION

  WHEN OTHERS THEN
    user_utility.print_line('Error code:'||SQLCODE);
    user_utility.print_line('Error message:'||SQLERRM);

   ROLLBACK;
   
END;
/

step 4: run DMU to convert charactersets


The DMU failed for some materialized view refresh, because they either are invalid or reference to non-exists objects.

I skip them and continue.

step 5.  Re-compile invalid objects and collect statistics.
@?/rdbms/admin/utlrp
EXEC DBMS_STATS.gather_database_stats;

After the charactersets is converted successfully, the application start to fail in the areas where they load data from database links from heterogeneous database, db2 and ms sqlserver.

For example, the long datatype on sqlserver is not described properly through database link, developer has to write a function to convert it to varchar in the query.

data overflow issues are fixed by change the table column from "byte" to "char", I use the following script to generate the 'alter table modify column varchar(n char)' script.

create table SEMANTICS$ as
 SELECT C.OWNER, C.TABLE_NAME, C.COLUMN_NAME, C.DATA_TYPE, C.CHAR_LENGTH
 FROM ALL_TAB_COLUMNS C, ALL_TABLES T
 WHERE C.OWNER = T.OWNER
 AND T.OWNER not in ('SYS','SYSTEM','XDB','SPPQOSSYS','OLAPSYS','MDSYS','FLOWS_FILES','APEX_030200', 'EXFSYS','CSMIG','WMSYS','ORDSYS','ORDDATA','TOAD','CTXSYS','OWBSYS','PERFSTAT','DSCSHE','OUTLN','DBSNMP','VIDEO4','DSCMGR10')
 AND C.TABLE_NAME = T.TABLE_NAME
 AND C.CHAR_USED = 'B'
 AND T.PARTITIONED != 'YES'
 AND C.TABLE_NAME NOT IN (SELECT TABLE_NAME FROM ALL_EXTERNAL_TABLES)
 AND C.DATA_TYPE IN ('VARCHAR2', 'CHAR')
/

set serveroutput on
set termout on
declare
cursor c1 is select * from semantics$;
v_statement varchar2(255);
v_nc number(10);
v_nt number(10);
begin
  execute immediate 'select count(*) from semantics$' into v_nc;
  execute immediate 'select count(distinct table_name) from semantics$' into v_nt;
  dbms_output.put_line ('ALTERing ' || v_nc || ' columns in ' || v_nt || ' tables');
  for r1 in c1 loop
    v_statement := 'ALTER TABLE "' || r1.owner || '"."' || r1.table_name;
    v_statement := v_statement || '" modify ("' || r1.column_name || '" ';
    v_statement := v_statement || r1.data_type || '(' || r1.char_length;
    v_statement := v_statement || ' CHAR));';
    dbms_output.put_line(v_statement);
    --execute immediate v_statement;
  end loop;
  dbms_output.put_line('Done');
end;
/ 


No comments:

Post a Comment