如何在 Java 中将 ISO-2022-CN 文本转换为 UTF-8?

问题描述 投票:0回答:1

我有一个 Java 应用程序需要将以 ISO-2022-CN 编码的字符串转换为 UTF-8。但是,当我尝试使用以下代码执行此操作时:

new String("Text".getBytes("ISO-2022-CN"), StandardCharsets.UTF_8);

我得到一个 java.lang.UnsupportedOperationException。

经过一番研究,得知ISO-2022-CN不支持编码。但是,我仍然需要将 ISO-2022-CN 字符串转换为 UTF-8。我怎样才能在 Java 中实现这一点?

我尝试使用以下代码:

import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CodingErrorAction;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;

public class Main {
  public static void main(String[] args) throws Exception {
    String iso2022cn = "Text";

    // Decode ISO-2022-CN to Unicode
    CharsetDecoder decoder = Charset.forName("ISO-2022-CN").newDecoder();
    decoder.onMalformedInput(CodingErrorAction.REPORT);
    decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
    ByteBuffer iso2022cnBytes = ByteBuffer.wrap(iso2022cn.getBytes("ISO-2022-CN"));
    CharBuffer unicodeChars = decoder.decode(iso2022cnBytes);

    // Encode Unicode to UTF-8
    CharsetEncoder encoder = Charset.forName("UTF-8").newEncoder();
    encoder.onMalformedInput(CodingErrorAction.REPORT);
    encoder.onUnmappableCharacter(CodingErrorAction.REPORT);
    ByteBuffer utf8Bytes = encoder.encode(unicodeChars);

    String utf8String = new String(utf8Bytes.array(), "UTF-8");
    System.out.println(utf8String);
  }
}

但是这段代码对我也不起作用。

任何人都可以提出解决方案或提供替代方法来将 ISO-2022-CN 文本转换为 Java 中的 UTF-8 吗?

java character-encoding
1个回答
0
投票
    /*
java.nio.charset.Charset defchar = java.nio.charset.Charset.defaultCharset();
String charsetInputName = defchar.name(); // get default OS system underlying base charset code page
//  "UTF_8";  "GB18030"  //java.nio.charset.StandardCharsets.UTF_8.name(); // "GB2312";  ///  "UTF_16";  // "UTF_8";  "GB18030"
FOLLOWING is middle hop transfer reencode from ("GB18030") PRC Chinese simplified carrying an English subset to a (GB2312) Web Cinese simplified with English subset
THEN from the middle hop (GB2312) web charset to a standard English charset (UTF_8)
String charsetFrom = "GB2312"; 
String charsetNameto = java.nio.charset.StandardCharsets.UTF_8.name(); //"GB2312"; US_ASCII   UTF_8
String charout = reEncodechars(java.util.Locale.ENGLISH,headerOrigin,"GB18030","GB2312");
charout = reEncodechars(java.util.Locale.ENGLISH,charout,charsetFrom,charsetNameto);
     */
    protected synchronized java.nio.charset.CharsetDecoder setCharsetDecoder(String Iname){
        return (java.nio.charset.Charset.forName(Iname)).newDecoder();
    }//enmeth

    
    
    
    protected synchronized java.nio.charset.CharsetEncoder setCharsetEncoder(String Outname){
        return (java.nio.charset.Charset.forName(Outname)).newEncoder();
    }//enmeth

    
    
    
    protected synchronized java.nio.CharBuffer getCharInBuffer(java.util.Locale locum, String headerOrigin, String charsetInputName)throws java.io.UnsupportedEncodingException, java.nio.charset.CharacterCodingException{
        headerOrigin = (String) String.format(locum, "%s", headerOrigin);    // locale SVC served output control (is some help)  
        java.nio.charset.CharsetDecoder ches = setCharsetDecoder(charsetInputName);
        ches.onMalformedInput(java.nio.charset.CodingErrorAction.IGNORE);  // IGNORE cannot do much inside a server
        ches.onUnmappableCharacter(java.nio.charset.CodingErrorAction.IGNORE);
        return (java.nio.CharBuffer) ches.decode(((java.nio.ByteBuffer) java.nio.ByteBuffer.wrap(headerOrigin.getBytes(charsetInputName))));
    }//enmeth

    
    
    
    protected synchronized String inChars(java.util.Locale locum, String headerOrigin, String charsetInputName, String charsetnameto){
        String output = "";  //reEncodechars() "+arrayReady;
        try {
            java.nio.charset.CharsetEncoder chunda = setCharsetEncoder(charsetnameto);
            chunda.onMalformedInput(java.nio.charset.CodingErrorAction.IGNORE);
            chunda.onUnmappableCharacter(java.nio.charset.CodingErrorAction.IGNORE);
            output = new String(((byte[]) ((java.nio.ByteBuffer) chunda.encode(((java.nio.CharBuffer) getCharInBuffer(locum, headerOrigin, charsetInputName)))).array()), charsetnameto);
            output = (String) String.format(locum, "%s", output);  // java.util.Locale SVC served output control (is some help)
        } catch (java.nio.charset.CharacterCodingException ex){
            ex.printStackTrace();
        } catch (java.io.UnsupportedEncodingException unsuppode){
            unsuppode.printStackTrace();
        }
//
        return ((String) output);
    }//enmeth

    
    
    
// WARNING must have the charsets encodings installed available
protected synchronized String doEncodechars(java.util.Locale locum, String headerOrigin, String charsetInputName, String charsetnameto){
String retrieve = "";
String retrieve2 = "";
int lenorig = headerOrigin.length();
String sti = "";
for (int xz = 0; xz < lenorig; xz++){
sti = headerOrigin.substring(xz, (xz + 1));
retrieve += inChars(locum, sti, charsetInputName, charsetnameto);
}//enfr
try{
byte[] retr2 = retrieve.getBytes(charsetnameto);
retrieve2 = new String(retr2, ((java.nio.charset.Charset) java.nio.charset.Charset.forName(charsetnameto)));
} catch (java.io.UnsupportedEncodingException ex){
ex.printStackTrace();
}
return ((String) String.format(locum, "%s", retrieve2));
}//enmeth


public synchronized String reEncodechars(java.util.Locale locum, String headerOrigin, String charsetInputName, String charsetnameto){
return (String)doEncodechars(locum, headerOrigin, charsetInputName, charsetnameto);
}//enmeth
© www.soinside.com 2019 - 2024. All rights reserved.