Thursday, October 22, 2015

Java, XML and Postgres

I'm migrating from Oracle 11g to Postgres for an ongoing project. Since I found few really helpful code examples online about how to load XML into Postgres using Java, I am posting my method here. It's a first run at it that worked. Enhancements will come later so please keep the hating to a minimum, there's always other ways of coding.

package edu.harvard.hul.lts;

import java.sql.*;
import java.io.*;
import java.lang.*;
import java.util.*;

public class Main {

    private static String driver = "";
    private static String instance = "";
    private static String user = "";
    private static String pass = "";

    public static void main(String[] args) {

        // Read the properties file         
        try {
            getProps();
        } catch (IOException e) {
            e.printStackTrace();
        }

        boolean proc_gsNew = false;

        // Get an array of all TABLE_NAMEs and get/set the Max ID. 
        int maxID = 0;
        List<gsNew> list = new ArrayList<gsNew>();

        if (proc_gsNew) {

            try {
                maxID = getMaxID("GS_NEW");
            } catch (IOException e) {
                e.printStackTrace();
            }

            if (maxID == 0) {
                System.out.println("[ERROR ASSIGNING MAX ID] Exiting...");
                System.exit(0);
            }
            try {
                list = getRecords(list, 9700, 11);
            } catch (IOException e) {
                e.printStackTrace();
            }

            for (gsNew o: list) {
                try {
                    o.MET_XML = getFGDCMetadataAsUTF8String(
                        o.TABLE_NAME);
                } catch (Exception xx) {
                    System.out.println("[MAJOR ERROR OCCURRED]\n");
                    xx.printStackTrace();
                }
            }

            // Next, add 1 record/object at a time to Postgres             
            for (gsNew o: list) {

                try {
                    int result = insertXmlPgObj( maxID, o );
                    maxID++;
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }

            list = null;
            maxID = 0;
        }
        System.out.println("End....");
        /*                Postgres calls         */ 
         String url = "jdbc:postgresql://localhost/OGP";
        String user = "postgres";
        String password = "postgres";

        Connection con = null;
        Statement st = null;
        ResultSet rs = null;

        try {
            con = DriverManager.getConnection(url, user, password);
            st = con.createStatement();
            rs = st.executeQuery("SELECT * FROM \"GS_NEW\"");

            while (rs.next()) {
                System.out.println(rs.getString(2)+"\n"+rs.getString(3));
            }

        } catch (SQLException ex) {
            System.out.println("SEVERE");
            ex.printStackTrace();
        } finally {
            try {
                if (rs != null) {
                    rs.close();
                }
                if (st != null) {
                    st.close();
                }
                if (con != null) {
                    con.close();
                }

            } catch (SQLException ex) {
                System.out.println("SEVERE");
                ex.printStackTrace();
            }
        }
    }

    private static void getProps() throws IOException {
        InputStream inputStream = null;
        Properties prop = new Properties();

        try {
            String propFileName = "project.properties";
            inputStream = Main.class.getClassLoader().getResourceAsStream(propFileName);

            if (inputStream != null) {
                prop.load(inputStream);
            } else {
                throw new FileNotFoundException("property file '" + propFileName + "' not found in the classpath");
            }

            // get the property value and print it out             
            instance = prop.getProperty("db.instance");
            driver = prop.getProperty("db.driver");
            user = prop.getProperty("db.user");
            pass = prop.getProperty("db.pass");

            inputStream.close();

        } catch (Exception e) {
            System.out.println("Exception: " + e);
        }
    }

    /*     * Call: Add data to the GS_NEW Postgres table 
     * @param int ID Start ID of record (primary key) 
     * @param gsNew o; List of gsNew objects that were already populated 
     * @return int: Status code of success 
     */     
    public static int insertXmlPgObj( int ID, gsNew o ) throws IOException {

        Connection con = null;
        Statement st = null;
        ResultSet rs = null;


        String url = "jdbc:postgresql://localhost/OGP?stringtype=unspecified";  
        String user = "postgres";
        String password = "postgres";

        String theDTD = "<!DOCTYPE metadata SYSTEM \"http://hgl.harvard.edu:8080/HGL/html/fgdc-std-001-1998.dtd\">";
        String theFGDCDTD = "<!DOCTYPE metadata SYSTEM \"http://www.fgdc.gov/metadata/fgdc-std-001-1998.dtd\">";
        //String theXMLDtd = "\uFEFF<?xml version=\"1.0\" encoding=\"UTF-8\"?>"; 
        String theXMLDtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";

        o.MET_XML = o.MET_XML.replace(theFGDCDTD, "");
        o.MET_XML = o.MET_XML.replace(theDTD, "");
        o.MET_XML = o.MET_XML.replace(theXMLDtd, "");

        String query;
        PreparedStatement pstmt = null;
        Statement stmt = null;
        int result = 1;

        try {
            con = DriverManager.getConnection(url, user, password);

            query = "INSERT INTO \"GS_NEW\"" +
                    "(\"ID\", \"TABLE_NAME\", \"FGDC\", \"DESCRIPTION\", \"PUBID\", " +
                    "\"GEOMETRY_TYPE\", \"PUB_XML\", \"PUB_245A\", \"PUB_520A\", \"HGL_LAYER_ID\"," +
                    "\"ACCESS_FLAG\"," +
                    "\"MINX\", \"MINY\", \"MAXX\", \"MAXY\", \"DATA_TYPE\") " +
                    "VALUES (?, ?, xml(?), ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)";

            pstmt = con.prepareStatement(query);  
          pstmt.setInt(1, o.ID);
          pstmt.setString(2, o.TABLE_NAME);  
          pstmt.setString(3, o.MET_XML);
            pstmt.setString(4, o.DESCRIPTION);
            pstmt.setInt(5, o.PUBID);
            pstmt.setInt(6, o.GEOMETRY_TYPE);
            pstmt.setString(7, o.PUB_XML);
            pstmt.setString(8, o.PUB_245A);
            pstmt.setString(9, o.PUB_520A);
            pstmt.setInt(10, o.HGL_LAYER_ID);
            pstmt.setString(11, o.ACCESS_FLAG);
            pstmt.setFloat(12, o.MINX);
            pstmt.setFloat(13, o.MINY);
            pstmt.setFloat(14, o.MAXX);
            pstmt.setFloat(15, o.MAXY);
            pstmt.setString(16, o.DATA_TYPE);

            if (pstmt.executeUpdate() == 1) {
                result = 1;
                System.out.println (ID+" Successfully updated the XML record.");
            } else {
                result = 2;
            }

            con.close();

        } catch (SQLException e) {
            System.out.println ("Problems updating the XML record for table: "+o.TABLE_NAME+". The record probably does not exist in gs_new.");
            e.printStackTrace();
            return result;
        }

        return result;
    } 
 
    public static int insertXML(String fileNameAndPath, String tableName, int ID) throws IOException {

        Connection con = null;
        Statement st = null;
        ResultSet rs = null;


        String url = "jdbc:postgresql://localhost/OGP?stringtype=unspecified"; 
        String user = "postgres";
        String password = "postgres";

        String theDTD = "<!DOCTYPE metadata SYSTEM \"http://hgl.harvard.edu:8080/HGL/html/fgdc-std-001-1998.dtd\">";
        String theFGDCDTD = "<!DOCTYPE metadata SYSTEM \"http://www.fgdc.gov/metadata/fgdc-std-001-1998.dtd\">";
        //String theXMLDtd = "\uFEFF<?xml version=\"1.0\" encoding=\"UTF-8\"?>"; 
        String theXMLDtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";

        int status = 0;

        // Change fileNameAndPath to the tempfile 
        String tempfileName = "tempXMLFile";

        // First read the data as UTF8 
        String xmlData = readUTF8InputFile(fileNameAndPath);

        // Create temp file 
        File tempFile = null;
        tempFile = File.createTempFile(tempfileName, ".tmp" );
        tempfileName = tempFile.getAbsolutePath();

        // Remove the tempfile if present in project 
        boolean isDeleted = tempFile.delete();

        // Remove the <!DOCTPE> 
        xmlData = xmlData.replace(theFGDCDTD, "");
        xmlData = xmlData.replace(theDTD, "");
        xmlData = xmlData.replace(theXMLDtd, "");

        // Write xmlData to the tempfile as UTF8 { For the purpose of checking data after loading } 
      Writer out = new BufferedWriter(new OutputStreamWriter(
                new FileOutputStream(tempFile), "UTF8"));
        out.append(xmlData);
        out.flush();
        out.close();

        //System.out.print(xmlData);
        String query;
        PreparedStatement pstmt = null;
        Statement stmt = null;
        int result = 1;

        try {
            con = DriverManager.getConnection(url, user, password);

            // MODIFIED FOR TESTING 
            query = "INSERT INTO \"GS_NEW\"(\"ID\", \"TABLE_NAME\", \"FGDC\") VALUES (?, ?, xml(?))";

            pstmt = con.prepareStatement(query); // create a statement 
            pstmt.setInt(1, ID); // set input parameter 1 
            pstmt.setString(2, tableName); // set input parameter 2 
            pstmt.setString(3, xmlData);

            if (pstmt.executeUpdate() == 1) {
                result = 1;
                System.out.println ("Successfully updated the XML record.");
            } else {
                result = 2;
            }

        } catch (SQLException e) {
            System.out.println ("Problems updating the XML record. The record probably does not exist in gs_new.");
            e.printStackTrace();
            return result;
        }

        return result;
    }

    /* 
     * Call: String fileData = readUTF8InputFile("/Users/Documents/Files/meta/NHGIS_POP1810.xml"); 
     * @param fileName String The name of the xml file to read 
     * @return String A String holding the contents of the input file 
    */ 
    private static String readUTF8InputFile(String fileName) {

        System.out.println("Entering hu.readUTF8InputFile()....");
        String UTF8Str = "";
        String theDTD = "<!DOCTYPE metadata SYSTEM \"http://hgl.harvard.edu:8080/HGL/html/fgdc-std-001-1998.dtd\">";
        String theFGDCDTD = "<!DOCTYPE metadata SYSTEM \"http://www.fgdc.gov/metadata/fgdc-std-001-1998.dtd\">";

        try {

            File fileDir = new File(fileName);
            BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(fileDir), "UTF8"));

            String str;

            while ((str = in.readLine()) != null) {
                UTF8Str += str;
            }

            in.close();

            // If the result string has the typical DOCTYPE then remove it (theDTD or theFGDCDTD) because 
            if (UTF8Str.indexOf(theDTD) > 0)
                UTF8Str = UTF8Str.replace(theDTD, "");
            if (UTF8Str.indexOf(theFGDCDTD) > 0)
                UTF8Str = UTF8Str.replace(theFGDCDTD, "");

        }  catch (UnsupportedEncodingException e) {
            System.out.println(e.getMessage());
        } catch (IOException e) {
            System.out.println(e.getMessage());
        } catch (Exception e) {
            System.out.println(e.getMessage());
        }

        System.out.println("Exiting hu.readUTF8InputFile()....");

        return UTF8Str;
    }

    /** Get FGDC metadata for a downloadable HGL resource. 
     * @param LayerName - HGL_TABLE_NAME from the ArcSDE LAYERS table 
     * @return String The metadata XML 
     */ 
    public static String getFGDCMetadataAsUTF8String(String LayerName) throws IOException {

        String theDTD = "<!DOCTYPE metadata SYSTEM \"http://hgl.harvard.edu:8080/HGL/html/fgdc-std-001-1998.dtd\"><metadata>";
        String fgdcRecord = "";
        String resultLine = "UNSET";

        java.sql.Connection _con = null;

        //String theQuery = "select e.met_xml.getClobval() as myxml from gs_new e WHERE TABLE_NAME = '"+LayerName.trim()+"'";         
        String theQuery = "select e.met_xml.getBlobVal(1) as myxml from gs_new e WHERE TABLE_NAME = '"+LayerName.trim()+"'";

        try {

            Class.forName(driver).newInstance();
            _con = java.sql.DriverManager.getConnection(instance, user, pass);

            Statement stmnt = _con.createStatement();
            ResultSet rs = stmnt.executeQuery(theQuery);

            if (rs.next()) {
                fgdcRecord = getBlobAsString(rs.getBlob("myxml"));
            }

            rs.close();
            stmnt.close();
            _con.close();

        } catch (InstantiationException e) {
            e.printStackTrace();
        } catch (IllegalAccessException e) {
            e.printStackTrace();
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
        } catch (SQLException e) {
            e.printStackTrace();
        }

        fgdcRecord = fgdcRecord.replaceFirst("<metadata>", theDTD);
        return fgdcRecord;

    }

    private static String getBlobAsString(Blob blob) {
        StringBuffer result = new StringBuffer();

        if ( blob != null ) {
            int read = 0;
            Reader reader = null;
            char[] buffer = new char[1024];

            try {
                reader = new InputStreamReader(blob.getBinaryStream(), "UTF8");

                while((read = reader.read(buffer)) != -1) {
                    result.append(buffer, 0, read);
                }
            } catch(SQLException ex) {
                throw new RuntimeException("Unable to read blob data.", ex);
            } catch(IOException ex) {
                throw new RuntimeException("Unable to read blob data.", ex);
            } finally {
                try { if(reader != null) reader.close(); } catch(Exception ex) {};
            }
        }

        return result.toString();
    }

    /** Get a table_name of a particular layer 
     * @param int layerID Layer ID to look at 
     * @return String A table_name string 
     */ 
     public static String getTablename(int layerID) throws IOException {

        String layerName = "";

        java.sql.ResultSet _resultSet = null;
        java.sql.Connection _con = null;

        String theQuery = "SELECT TABLE_NAME FROM GS_NEW WHERE HGL_LAYER_ID = " + layerID;

        try {
            Class.forName(driver).newInstance();
            _con = java.sql.DriverManager.getConnection(instance, user, pass);

            Statement stmnt = _con.createStatement();
            ResultSet rs = stmnt.executeQuery(theQuery);

            if (rs.next()){
                layerName = rs.getString("TABLE_NAME");
            }

            rs.close();
            stmnt.close();
            _con.close();

        } catch (InstantiationException e) {
            e.printStackTrace();
        } catch (IllegalAccessException e) {
            e.printStackTrace();
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
        } catch (SQLException e) {
            e.printStackTrace();
        }

        return layerName;
    }

    /** 
     * @return Unordered <List> of table names starting from 'start' location 
     * @param int start: Start position of ID in table 
    */ 
    public static List getTableNamesAsList(int start, int limit) throws IOException {

        List<String> list = new ArrayList<String>();

        java.sql.ResultSet _resultSet = null;
        java.sql.Connection _con = null;

        String theQuery = "SELECT TABLE_NAME FROM GS_NEW WHERE ID > "+start+" AND ROWNUM <= "+ limit +" ORDER BY TABLE_NAME";

        try {
            Class.forName(driver).newInstance();
            _con = java.sql.DriverManager.getConnection(instance, user, pass);

            Statement stmnt = _con.createStatement();
            ResultSet rs = stmnt.executeQuery(theQuery);

            while (rs.next()) {
                list.add( rs.getString("TABLE_NAME"));
            }

            rs.close();
            stmnt.close();
            _con.close();

        } catch (InstantiationException e) {
            e.printStackTrace();
        } catch (IllegalAccessException e) {
            e.printStackTrace();
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
        } catch (SQLException e) {
            e.printStackTrace();
        }

        return list;
    }

    /** 
     * @return Unordered <List> of table data starting from 'start' location 
     * @param list A gsNew object that you want populated with data 
     * @param int start: Start position of ID in table 
     * @param int limit: The number of records that you want returned 
     */ 
    public static List getRecords(List<gsNew> list, int start, int limit) throws IOException {

        java.sql.ResultSet _resultSet = null;
        java.sql.Connection _con = null;

        String theQuery = "SELECT * FROM GS_NEW WHERE ID > "+start+" AND ROWNUM <= "+ limit +" ORDER BY TABLE_NAME";

        try {
            Class.forName(driver).newInstance();
            _con = java.sql.DriverManager.getConnection(instance, user, pass);

            Statement stmnt = _con.createStatement();
            ResultSet rs = stmnt.executeQuery(theQuery);

            while (rs.next()) {

                gsNew obj = new gsNew();

                obj.TABLE_NAME = rs.getString("TABLE_NAME");
                obj.DESCRIPTION = rs.getString("DESCRIPTION");
                obj.PUB_245A = rs.getString("PUB_245A");
                obj.PUB_520A = rs.getString("PUB_520A");
                obj.DATA_TYPE = rs.getString("DATA_TYPE");
                obj.ACCESS_FLAG = rs.getString("ACCESS_FLAG");
                obj.PUB_XML = rs.getString("PUB_XML");

                obj.ID = rs.getInt("ID");
                obj.PUBID = rs.getInt("PUBID");
                obj.GEOMETRY_TYPE = rs.getInt("GEOMETRY_TYPE");
                obj.HGL_LAYER_ID = rs.getInt("HGL_LAYER_ID");

                obj.MINX = rs.getFloat("MINX");
                obj.MINY = rs.getFloat("MINY");
                obj.MAXX = rs.getFloat("MAXX");
                obj.MAXY = rs.getFloat("MAXY");

                list.add( obj );

            }

            rs.close();
            stmnt.close();
            _con.close();

        } catch (InstantiationException e) {
            e.printStackTrace();
        } catch (IllegalAccessException e) {
            e.printStackTrace();
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
        } catch (SQLException e) {
            e.printStackTrace();
        }

        return list;
    }


    /** Get the max ID from table passed 
    * @return int MaxID of records 
    */
    public static int getMaxID(String tableName) throws IOException {

        int maxID = 0;
        java.sql.ResultSet _resultSet = null;
        java.sql.Connection _con = null;

        String theQuery = "SELECT MAX(ID) AS MAX FROM "+tableName;

        try {
            Class.forName(driver).newInstance();
            _con = java.sql.DriverManager.getConnection(instance, user, pass);

            Statement stmnt = _con.createStatement();
            ResultSet rs = stmnt.executeQuery(theQuery);

            if (rs.next()) {
                maxID = rs.getInt("MAX");
            }

            rs.close();
            stmnt.close();
            _con.close();

        } catch (InstantiationException e) {
            e.printStackTrace();
        } catch (IllegalAccessException e) {
            e.printStackTrace();
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
        } catch (SQLException e) {
            e.printStackTrace();
        }

        return maxID;
    }

}