[将50661个字符的XML文档加载到Oracle XMLTABLE中时,它似乎在位置40000处剥离了单个空格字符,但是继续处理XML文档的其余部分,为什么?
我提供了一个简单的测试脚本来在Oracle Database 11g企业版11.2.0.4.0-64位生产环境中演示此问题
create or replace TYPE SPCBH_V_COLUMNS_OBJECT AS OBJECT
(
COLUMN_ID NUMBER(3),
COLUMN_NAME VARCHAR2(15),
DATA_TYPE VARCHAR2(16),
PRECISION NUMBER(3),
MAX_LENGTH NUMBER(6),
SCALE NUMBER(3),
IS_NULLABLE NUMBER(1),
IS_IDENTITY NUMBER(1),
IDENTITY_NAME VARCHAR2(50),
SEED_VALUE NUMBER(3),
INCREMENT_VALUE NUMBER(3),
LAST_VALUE NUMBER(3),
DEFAULT_CONSTRAINT_NAME VARCHAR2(51),
DEFAULT_CONSTRAINT_TYPE VARCHAR2(3),
DEFAULT_CONSTRAINT_TYPE_DESC VARCHAR2(61),
DEFAULT_CONSTRAINT_DEFINITION VARCHAR2(4000),
COLUMN_USER_NAME VARCHAR2(100),
COLUMN_PARENT_NAME VARCHAR2(9),
COLUMN_PARENT_COLUMN_NAME VARCHAR2(16),
COLUMN_PARENT_SUB_TYPE VARCHAR2(9),
COLUMN_COMMENTS VARCHAR2(2001),
COLUMN_UPDATED_FLAG VARCHAR2(2),
COLUMN_AUDITED_FLAG VARCHAR2(2)
);
create or replace TYPE SPCBH_V_COLUMNS_TABLE AS TABLE OF SPCBH_V_COLUMNS_OBJECT;
这里是测试匿名块
declare
V_XML_PATH VARCHAR2(200);
V_PARSED_COLUMNS SPCBH_V_COLUMNS_TABLE;
DDLXMLSTRING XMLTYPE;
V_TEMP_XML_PATH VARCHAR2(200);
v_length int;
BEGIN
V_XML_PATH := '\\ipcbhcs01\systems\231\PW6\Staging\PW-0231-70\AZLASRL.XML';
V_TEMP_XML_PATH := REPLACE(SUBSTR(V_XML_PATH,INSTR(V_XML_PATH,'\',1,4),200),'\','/');
SELECT XMLTYPE(bfilename('PWSTAGING',V_TEMP_XML_PATH),NLS_CHARSET_ID('WE8ISO8859P15'))
INTO DDLXMLSTRING FROM dual ;
WITH
table_data AS (
SELECT td.*
FROM XMLTABLE('/xml/table'
PASSING (ddlxmlstring)
COLUMNS
table_name VARCHAR2(50) PATH '@table_name',
table_user_name VARCHAR2(100) PATH 'table_user_name',
table_comments CLOB PATH 'table_comments',
table_file_sub_type VARCHAR2(10) PATH 'table_file_sub_type',
table_product VARCHAR2(3) PATH 'table_product',
table_directory VARCHAR2(10) PATH 'table_directory',
table_number NUMBER(5) PATH 'table_number',
table_prim_maint_view VARCHAR2(30) PATH 'table_prim_maint_view',
table_stf_flag VARCHAR2(2) PATH 'table_stf_flag',
table_public VARCHAR2(2) PATH 'table_public',
table_updated_flag VARCHAR2(2) PATH 'table_updated_flag',
table_audited_flag VARCHAR2(2) PATH 'table_audited_flag',
columns XMLTYPE PATH 'columns',
indexes XMLTYPE PATH 'indexes'
) td),
column_data AS (
SELECT cd.*
FROM table_data td,
XMLTABLE('/columns/column'
PASSING td.columns
COLUMNS
column_id NUMBER(3) PATH '@column_id',
column_name VARCHAR2(15) PATH 'column_name',
data_type VARCHAR2(16) PATH 'data_type',
precision NUMBER(3) PATH 'precision',
max_length NUMBER(6) PATH 'max_length',
scale NUMBER(3) PATH 'scale',
is_nullable VARCHAR2(1) PATH 'is_nullable',
is_identity VARCHAR2(1) PATH 'is_identity',
identity_name VARCHAR2(50) PATH 'identity_name',
seed_value NUMBER(3) PATH 'seed_value',
increment_value NUMBER(3) PATH 'increment_value',
last_value NUMBER(3) PATH 'last_value',
default_constraint_name VARCHAR2(51) PATH 'default_constraint_name',
default_constraint_type VARCHAR2(3) PATH 'default_constraint_type',
default_constraint_type_desc VARCHAR2(61) PATH 'default_constraint_type_desc',
default_constraint_definition VARCHAR2(4000) PATH 'default_constraint_definition',
column_user_name VARCHAR2(100) PATH 'column_user_name',
column_parent_name VARCHAR2(9) PATH 'column_parent_name',
column_parent_column_name VARCHAR2(16) PATH 'column_parent_column_name',
column_parent_sub_type VARCHAR2(9) PATH 'column_parent_sub_type',
column_comments VARCHAR2(2001) PATH 'column_comments',
column_updated_flag VARCHAR2(2) PATH 'column_updated_flag',
column_audited_flag VARCHAR2(2) PATH 'column_audited_flag'
) cd )
SELECT SPCBH_V_COLUMNS_OBJECT(
COLUMN_ID,
COLUMN_NAME,
DATA_TYPE,
PRECISION,
MAX_LENGTH,
SCALE,
IS_NULLABLE,
IS_IDENTITY,
IDENTITY_NAME,
SEED_VALUE,
INCREMENT_VALUE,
LAST_VALUE,
DEFAULT_CONSTRAINT_NAME,
DEFAULT_CONSTRAINT_TYPE,
DEFAULT_CONSTRAINT_TYPE_DESC,
DEFAULT_CONSTRAINT_DEFINITION,
COLUMN_USER_NAME,
COLUMN_PARENT_NAME,
COLUMN_PARENT_COLUMN_NAME,
COLUMN_PARENT_SUB_TYPE,
COLUMN_COMMENTS,
COLUMN_UPDATED_FLAG,
COLUMN_AUDITED_FLAG)
BULK COLLECT INTO V_PARSED_COLUMNS
FROM column_data;
select length(DDLXMLSTRING.getClobVal()) into v_length from dual;
dbms_output.put_line('length(DDLXMLSTRING):'||v_length);
DBMS_OUTPUT.PUT_LINE(RPAD('COLUMN_ID',10) || RPAD('COLUMN_NAME',18) || RPAD('COLUMN_PARENT_COLUMN_NAME',30) || RPAD('COLUMN_PARENT_SUB_TYPE',30));
FOR MY_CURSOR IN (SELECT
COLUMN_ID,
COLUMN_NAME,
DATA_TYPE,
PRECISION,
MAX_LENGTH,
SCALE,
IS_NULLABLE,
IS_IDENTITY,
IDENTITY_NAME,
SEED_VALUE,
INCREMENT_VALUE,
LAST_VALUE,
DEFAULT_CONSTRAINT_NAME,
DEFAULT_CONSTRAINT_TYPE,
DEFAULT_CONSTRAINT_TYPE_DESC,
DEFAULT_CONSTRAINT_DEFINITION,
COLUMN_USER_NAME,
COLUMN_PARENT_NAME,
COLUMN_PARENT_COLUMN_NAME,
COLUMN_PARENT_SUB_TYPE,
COLUMN_COMMENTS,
COLUMN_UPDATED_FLAG,
COLUMN_AUDITED_FLAG
FROM TABLE(V_PARSED_COLUMNS) ORDER BY COLUMN_ID)
LOOP
DBMS_OUTPUT.PUT_LINE(RPAD(MY_CURSOR.COLUMN_ID ,10) || RPAD(MY_CURSOR.COLUMN_NAME,18) || RPAD(NVL(MY_CURSOR.COLUMN_PARENT_COLUMN_NAME,'NULL'),30) || RPAD(NVL(MY_CURSOR.COLUMN_PARENT_SUB_TYPE,'NULL'),30));
END LOOP;
end;
在位置40000处,XML阅读器似乎跳过该空间,并以NULL加载XMLTYPE,在其他任何地方都不会发生这种情况。如果我将值更改为单个空格以外的其他值,例如两个空格或“ ABC”,可以正常工作。
XML文档和显示https://drive.google.com/open?id=124zDZYiNJnNzenQScgCbT3RKk7SApYPO处第40,000个字符处的文档的屏幕截图
您将需要指向可用的数据库目录。
此示例的最小可复制示例是:
DECLARE
PROCEDURE testClob(
p_position NUMBER,
p_value VARCHAR2
)
IS
p_clob CLOB := '<root><a>';
p_middle VARCHAR2(7) := '</a><b>';
p_close VARCHAR2(11) := '</b></root>';
p_deduction NUMBER := LENGTH( p_clob ) + LENGTH( p_middle );
p_a CLOB;
p_b VARCHAR2(10);
BEGIN
FOR i IN 1 .. FLOOR( ( p_position - p_deduction) / 4000 ) LOOP
p_clob := p_clob || RPAD( '_', 4000, '_' );
END LOOP;
p_clob := p_clob || RPAD( '_', MOD( p_position - p_deduction, 4000 ), '_' );
p_clob := p_clob || p_middle;
DBMS_OUTPUT.PUT( LENGTH( p_clob ) || ' characters then "' || p_value || '" gives ' );
p_clob := p_clob || p_value;
p_clob := p_clob || p_close;
SELECT a, b
INTO p_a, p_b
FROM XMLTABLE(
'/root'
PASSING XMLTYPE( p_clob )
COLUMNS
a CLOB PATH 'a',
b VARCHAR2(10) PATH 'b'
);
DBMS_OUTPUT.PUT_LINE( '"'||p_b||'"' );
END;
BEGIN
testClob( 39998, '' );
testClob( 39998, ' ' );
testClob( 39998, ' ' );
testClob( 39998, 'ABC' );
testClob( 39998, 'A' );
END;
/
哪个输出:
39998个字符,然后“”给出“”39998个字符,然后“”给出“”39998个字符,然后“”给出“”39998个字符,然后“ ABC”给出“ ABC”39998个字符,然后“ A”给出“ A”
db <>小提琴here
在Oracle 18c和the OP testing on Oracle 12上进行测试无法证明此问题。
[它似乎是Oracle 11g中的一个错误(也许是更早的版本,但这未经测试);因此,您可以:
XMLTABLE
,XMLQUERY
和EXTRACTVALUE
上都遇到相同的问题,因此似乎没有纯SQL解决方案可用。您可以将Java函数嵌入数据库中以处理XML解析,但这将导致性能下降,并且似乎对该问题反应过度。