2016-09-26 2 views
0

CSV를 XML 데이터로 변환하려고했습니다. 다양한 예제를 통해 CSV 파일을 구문 분석하고 XML 파일을 가져 오는 코드를 작성할 수있었습니다. 그러나 필자가 작성한 코드는 잘못된 태그가있는 XML 파일을 반환합니다.CSV to XML 변환 Java

이 변환에 대한 코드입니다 :

package com.adarsh.parse; 
import java.io.BufferedReader; 
import java.io.File; 
import java.io.FileReader; 
import java.io.IOException; 
import java.util.StringTokenizer; 
import javax.xml.parsers.DocumentBuilder; 
import javax.xml.parsers.DocumentBuilderFactory; 
import javax.xml.parsers.FactoryConfigurationError; 
import javax.xml.parsers.ParserConfigurationException; 
import javax.xml.transform.OutputKeys; 
import javax.xml.transform.Result; 
import javax.xml.transform.Source; 
import javax.xml.transform.Transformer; 
import javax.xml.transform.TransformerFactory; 
import javax.xml.transform.dom.DOMSource; 
import javax.xml.transform.stream.StreamResult; 
import org.w3c.dom.Document; 
import org.w3c.dom.Element; 



public class Converter { 

    /* Protected members to avoid instantiation */ 
    protected DocumentBuilderFactory domFactory = null; 
    protected DocumentBuilder domBuilder = null; 
    /* Constant strings */ 
    // Input CSV file 
    final String INPUT_FILE = "sample_data.csv"; 
    // Output XML document 
    final String OUTPUT_FILE ="in.xml"; 
    // First element in the XML document 
    final String FIRST_ELEMENT="school"; 
    public Converter(){ 
     try { 
      domFactory = DocumentBuilderFactory.newInstance(); 
      /* Obtaining instance of class DocumentBuilder */ 
      domBuilder = domFactory.newDocumentBuilder(); 
     } 
     catch(ParserConfigurationException exp) { 
      System.err.println(exp.toString()); 

     } 
     catch(FactoryConfigurationError exp){ 
      System.err.println(exp.toString()); 
     } 
     catch(Exception exp){ 
      System.err.println(exp.toString()); 
     } 
    } 
    /** 
    * This method converts the given CSV file into an XML document 
    */ 
    public int convert(String csvFileName, String xmlFileName) { 
     int rowCount = -1; 
     try { 
      /* Initializing the XML document */ 
      Document newDoc = domBuilder.newDocument(); 
      /* Creating the root element in the XML */ 
      Element rootElem = newDoc.createElement(FIRST_ELEMENT); 
      newDoc.appendChild(rootElem); 
      /* Reading the CSV file */ 
      BufferedReader csvFileReader; 
      csvFileName = INPUT_FILE; 
      csvFileReader = new BufferedReader(new FileReader(csvFileName)); 
      /* Initialize the number of fields to 0 */ 
      int fieldCount = 0; 
      String[] csvFields = null; 
      StringTokenizer stringTokenizer = null; 

      /** 
      * Map the column names in the CSV file as the elements in the XML 
      * document, eliminate any other characters not eligible for XML element 
      * naming 
      */ 
      /* Initialize the current line variable */ 
      String currLine = csvFileReader.readLine(); 
      /* Loop until we reach the end of the file 
      * edge case: Empty CSV file 
      * */ 

      if(currLine != null) { 
       /* Separate fields based on commas */ 
       stringTokenizer = new StringTokenizer(currLine, ","); 
       fieldCount = stringTokenizer.countTokens(); 
       /* If there is data in the CSV file */ 
       if(fieldCount > 0) { 
        /* Initialize a String Array of Fields */ 
        csvFields = new String[fieldCount]; 
        int i = 0; 
        /* Loop till all elements are found and save fields */ 
        while (stringTokenizer.hasMoreElements()) { 
         csvFields[i++] = String.valueOf(stringTokenizer.nextElement()); 
        } 
       } 

      } 
      else { 
       System.out.println("Nothing to parse"); 
      } 
      /* reading rows from the CSV file */ 
      while((currLine = csvFileReader.readLine()) != null) { 
       stringTokenizer = new StringTokenizer(currLine, ","); 
       fieldCount = stringTokenizer.countTokens(); 
       /* if rows exist in the CSV file*/ 
       if(fieldCount > 0) { 
        /* Create the row element*/ 
        Element rowElem = newDoc.createElement("row"); 
        int i = 0; 
        /* until there are more elements*/ 
        while(stringTokenizer.hasMoreElements()) { 
         try { 
          /* Append each element found to each row element*/ 
          String currValue = String.valueOf(stringTokenizer.nextElement()); 
          Element currElem = newDoc.createElement(csvFields[i++]); 
          currElem.appendChild(newDoc.createTextNode(currValue)); 
          rowElem.appendChild(currElem); 
         } 
         catch(Exception exp) { 

         } 
        } 
        /* Append the rows to the root element*/ 
        rootElem.appendChild(rowElem); 
        rowCount++; 
       } 
      } 
      /* Finish reading the CSV file */ 
      csvFileReader.close(); 

      /* Saving the generated XML doc into required format file to disk */ 
      TransformerFactory tranFactory = TransformerFactory.newInstance(); 
      Transformer aTransformer = tranFactory.newTransformer(); 
      aTransformer.setOutputProperty(OutputKeys.INDENT, "yes"); 
      aTransformer.setOutputProperty(OutputKeys.METHOD, "xml"); 
      aTransformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4"); 
      Source src = new DOMSource(newDoc); 
      xmlFileName = OUTPUT_FILE; 
      Result dest = new StreamResult(new File(xmlFileName)); 
      aTransformer.transform(src, dest); 

      rowCount++; 
     } 
     catch(IOException exp) { 
      System.err.println(exp.toString()); 
     } 
     catch(Exception exp) { 
      System.err.println(exp.toString()); 
     } 
     /* Number of rows parsed into XML */ 
     return rowCount; 

    } 

} 

이 파일의 샘플 CSV 데이터입니다 :

classroom_id, CLASSROOM_NAME, teacher_1_id, teacher_1_last_name, teacher_1_first_name, teacher_2_id, teacher_2_last_name, teacher_2_first_name, student_id, student_last_name, student_first_name, student_grade 103, Brian 's Homeroom, 10300000001, O'Donnell, Brian,,,,, 102, 스미스의 PhysEd 클래스, 10200000001, 스미스, 아서, 10200000011, Patterson, John, 10200000011, McCrancy, Brandon, 1 102, Smith의 PhysEd 클래스, 10200000001, Smith, Arthur, 10200000011, Patterson, John, 10200000018, Reginald, Alexis, 1 102, Smith의 PhysEd 클래스, 10200000001, Smith, Arthur, 10200000011, Patterson, John, 10200000019, Gayle, Matthew, 10210, Smith의 PhysEd 클래스, 10200000001, Smith, Arthur, 10200000011, Patterson, John, 10200000010, Smith, Nathaniel, 1 102 Smith Smith의 PhysEd 클래스, 10200000001, Smith, Arthur, 10200000011, Patterson, John, 10200000013, Lanni, Erica, 1 102, Mr. Smith의 PhysEd 클래스, 10200000001, Smith, Arthur, 10200000011, Patterson, John, 10200000014 플로레스, 마이클, 1 102, 스미스 씨의스미스 (Smith)의 PhysEd 클래스, 10200000001, 스미스, 아서, 10200000011, 패터슨, 요하네스, 10200000016, 페레즈 (Perez)의 페레즈 (Perez) 소재의 페레즈 (Perez) Brittany, 1 102, Smith의 PhysEd 클래스, 10200000001, Smith, Arthur, 10200000011, Patterson, John, 10200000015, Hill, Jasmin, 1 102, Smith의 PhysEd 클래스, 10200000001, Smith, Arthur,, Patterson , John, 10200000017, Hiram, William, 101101, Mrs. Barbara,,, 10100000014, Garcia, Lizzie, 1 101, Mrs. Jones '수학 Class, 10100000001, Jones, Barbara,,, 10100000013, Mercado, Toby, 1 101, Mrs. Jones의 수학 수업, 10100000001, Jones, Barbara,, 10100000011, Gutierrez, Kimberly, 2 101, Mrs. 존스의 수학 클래스, 10100000001, 존스, 바바라,,,, 10100000010, 길, 마이클, 2

내가 XML 파일에 다음과 같은 출력을 얻기 위해 기다리고 있었다 :

<grade id="1"> 
    <classroom id="101" name="Mrs. Jones' Math Class"> 
     <teacher id="10100000001" first_name="Barbara" last_name="Jones"/> 

     <student id="10100000010" first_name="Michael" last_name="Gil"/> 
     <student id="10100000011" first_name="Kimberly" last_name="Gutierrez"/> 
     <student id="10100000013" first_name="Toby" last_name="Mercado"/> 
     <student id="10100000014" first_name="Lizzie" last_name="Garcia"/> 
     <student id="10100000015" first_name="Alex" last_name="Cruz"/> 
    </classroom> 


    <classroom id="102" name="Mr. Smith's PhysEd Class"> 
     <teacher id="10200000001" first_name="Arthur" last_name="Smith"/> 
     <teacher id="10200000011" first_name="John" last_name="Patterson"/> 

     <student id="10200000010" first_name="Nathaniel" last_name="Smith"/> 
     <student id="10200000011" first_name="Brandon" last_name="McCrancy"/> 
     <student id="10200000012" first_name="Elizabeth" last_name="Marco"/> 
     <student id="10200000013" first_name="Erica" last_name="Lanni"/> 
     <student id="10200000014" first_name="Michael" last_name="Flores"/> 
     <student id="10200000015" first_name="Jasmin" last_name="Hill"/> 
     <student id="10200000016" first_name="Brittany" last_name="Perez"/> 
     <student id="10200000017" first_name="William" last_name="Hiram"/> 
     <student id="10200000018" first_name="Alexis" last_name="Reginald"/> 
     <student id="10200000019" first_name="Matthew" last_name="Gayle"/> 
    </classroom> 

    <classroom id="103" name="Brian's Homeroom"> 
     <teacher id="10300000001" first_name="Brian" last_name="O'Donnell"/> 
    </classroom> 
</grade> 
<?xml version="1.0" encoding="UTF-8" standalone="no"?> 
<school> 
    <row> 
     <classroom_id>101</classroom_id> 
    </row> 
    <row> 
     <classroom_id>101</classroom_id> 
    </row> 
    <row> 
     <classroom_id>101</classroom_id> 
    </row> 
    <row> 
     <classroom_id>101</classroom_id> 
    </row> 
    <row> 
     <classroom_id>101</classroom_id> 
    </row> 
    <row> 
     <classroom_id>102</classroom_id> 
    </row> 
    <row> 
     <classroom_id>102</classroom_id> 
    </row> 
    <row> 
     <classroom_id>102</classroom_id> 
    </row> 
    <row> 
     <classroom_id>102</classroom_id> 
    </row> 
    <row> 
     <classroom_id>102</classroom_id> 
    </row> 
    <row> 
     <classroom_id>102</classroom_id> 
    </row> 
    <row> 
     <classroom_id>102</classroom_id> 
    </row> 
    <row> 
     <classroom_id>102</classroom_id> 
    </row> 
    <row> 
     <classroom_id>102</classroom_id> 
    </row> 
    <row> 
     <classroom_id>102</classroom_id> 
    </row> 
    <row> 
     <classroom_id>103</classroom_id> 
    </row> 
</school> 

그래서 사람이 나를 도와주세요 수 : 403,210

이 내가 현재 출력을 얻고 어떻게? 내가 어디로 잘못 가고 있는지 궁금해하고 있었다. 감사합니다

추신나는 이미 stackoverflow에서 XML 변환에 대한 CSV에 관한 다른 질문을 언급했다. 그러나, 나는 나에게 특정한 문제에 대한 적절한 해결책이나 설명을 찾을 수 없었다.

P.S. XML과 같은 CSV 데이터를 구문 분석하는 것이 의무가 아닌 경우 XSLT를 사용하도록 제안하지 마십시오. 다른 선택의 여지가 없다면 XSLT에 대한 지식이 거의 없으므로 XSLT를 배워야 할 것입니다. 이미 작성한 코드에서 변경 사항을 제안하면 많은 도움이 될 것입니다.

답변

0

CSV 콘텐츠에 개행 구분 기호가없는 것으로 보입니다.