Wednesday 31 July 2013

Convert Word document to Images


Aspose provides us java liberary (called Aspose words) to work with Microsoft  worddocument to insert content, retrieve content, export document into images. So lets talk about to convert .doc and .docx file to images(png, jpeg format).

Here is the link from where we can download latest Aspose word jar.
http://www.aspose.com/java/word-component.aspx
click on download button.

Generate images of .doc/.docx file


 public void generateImages(final String sourcePath) {  
      try {  
           Document doc = new Document(sourcePath);  
           ImageSaveOptions options = new ImageSaveOptions(SaveFormat.JPEG);  
           options.setJpegQuality(100);  
           options.setResolution(100);  
           options.setUseHighQualityRendering(true);  
           for (int i = 0; i < doc.getPageCount(); i++) {  
                String imageFilePath = sourcePath + "_output_" + i + ".jpeg";  
                options.setPageIndex(i);  
                doc.save(imageFilePath, options);  
           }  
      } catch (Exception e) {  
           e.printStackTrace();  
      }  
 }  

Generate images of all pictures and charts present in .doc/.docx file

 public void generatePicturesAsImages(final String sourcePath) {  
      try {  
           Document doc = new Document(sourcePath);  
           ImageSaveOptions options = new ImageSaveOptions(SaveFormat.JPEG);  
           options.setJpegQuality(100);  
           options.setResolution(100);  
           options.setUseHighQualityRendering(true);  
           List<ShapeRenderer> pictures = getAllPictures(doc);  
           if (pictures != null) {  
                for (int i = 0; i < pictures.size(); i++) {  
                     ShapeRenderer picture = pictures.get(i);  
                     String imageFilePath = sourcePath + "_output_" + i + ".jpeg";  
                     picture.save(imageFilePath, options);  
                }  
           }  
      } catch (Exception e) {  
           e.printStackTrace();  
      }  
 }  
 private List<ShapeRenderer> getAllPictures(final Document document) throws Exception {  
      List<ShapeRenderer> images = null;  
      @SuppressWarnings("unchecked")  
      NodeCollection<DrawingML> nodeCollection = document.getChildNodes(NodeType.DRAWING_ML, Boolean.TRUE);  
      if (nodeCollection.getCount() > 0) {  
           images = new ArrayList<ShapeRenderer>();  
           for (DrawingML drawingML : nodeCollection) {  
                images.add(drawingML.getShapeRenderer());  
           }  
      }  
      return images;  
 } 


Generate images of all tables present in .doc/.docx file


 public void generateTablesAsImages(final String sourcePath) {  
      try {  
           Document doc = new Document(sourcePath);  
           ImageSaveOptions options = new ImageSaveOptions(SaveFormat.PNG);  
           options.setJpegQuality(100);  
           options.setResolution(100);  
           options.setUseHighQualityRendering(true);  
           options.setPaperColor(new Color(0, 0, 0, 0));  
           List<Table> tables = getAllTables(doc);  
           if (tables != null) {  
                int noOfImages = 0;  
                for (int i = 0; i < tables.size(); i++) {  
                     Table table = tables.get(i);  
                     String imageFilePath = sourcePath + "_output_" + i + ".png";  
                     saveTableAsImage(table, options, imageFilePath);  
                }  
           }  
      } catch (Exception e) {  
           e.printStackTrace();  
      }  
 }  
 private List<Table> getAllTables(final Document document) {  
      List<Table> tables = null;  
      @SuppressWarnings("unchecked")  
      NodeCollection<Table> nodeCollection = document.getChildNodes(NodeType.TABLE, Boolean.TRUE);  
      if (nodeCollection.getCount() > 0) {  
           tables = new ArrayList<Table>();  
           for (Table table : nodeCollection) {  
                tables.add(table);  
           }  
      }  
      return tables;  
 }  
 private void saveTableAsImage(final Table table, final ImageSaveOptions imageOptions, final String outputFile) throws Exception {  
      // There a bug which affects the cache of a cloned node. To avoid this we instead clone the entire document including all nodes,  
      // find the matching node in the cloned document and render that instead.  
      Document doc = ((Document) table.getDocument()).deepClone();  
      Node node = doc.getChild(NodeType.TABLE, table.getDocument().getChildNodes(NodeType.TABLE, true).indexOf(table), true);  
      Section parentSection = (Section) node.getAncestor(NodeType.SECTION);  
      // Create a temporary shape to store the target node in. This shape will be rendered to retrieve the rendered content of the node.  
      Shape shape = new Shape(doc, ShapeType.TEXT_BOX);  
      setShapeDefaultProperties(shape, parentSection);  
      // Add the node to the shape.  
      shape.appendChild(node.deepClone(true));  
      // We must add the shape to the document tree to have it rendered.  
      parentSection.getBody().getFirstParagraph().appendChild(shape);  
      shape.getShapeRenderer().save(outputFile, imageOptions);  
      BufferedImage renderedImage = ImageIO.read(new File(outputFile));  
      // Extract the actual content of the image by cropping transparent space around  
      // the rendered shape.  
      Rectangle cropRectangle = FindBoundingBoxAroundNode(renderedImage);  
      BufferedImage out = renderedImage.getSubimage(cropRectangle.x, cropRectangle.y, cropRectangle.width, cropRectangle.height);  
      File outputfile = new File(outputFile);  
      ImageIO.write(out, "png", outputfile);  
 }  
 private void setShapeDefaultProperties(final Shape shape, final Section section) {  
      // Assume that the node cannot be larger than the page in size.  
      shape.setWidth(section.getPageSetup().getPageWidth());  
      shape.setHeight(section.getPageSetup().getPageHeight());  
      // We must make the shape and paper color transparent.  
      shape.setFillColor(new Color(0, 0, 0, 0));  
      // Don't draw a surrounding line on the shape.  
      shape.setStroked(false);  
 }  
 private Rectangle FindBoundingBoxAroundNode(final BufferedImage originalBitmap) {  
      Point min = new Point(Integer.MAX_VALUE, Integer.MAX_VALUE);  
      Point max = new Point(Integer.MIN_VALUE, Integer.MIN_VALUE);  
      for (int x = 0; x < originalBitmap.getWidth(); ++x) {  
           for (int y = 0; y < originalBitmap.getHeight(); ++y) {  
                int argb = originalBitmap.getRGB(x, y);  
                if (argb != new Color(0, 0, 0, 0).getRGB()) {  
                     min.x = Math.min(x, min.x);  
                     min.y = Math.min(y, min.y);  
                     max.x = Math.max(x, max.x);  
                     max.y = Math.max(y, max.y);  
                }  
           }  
      }  
      return new Rectangle(min.x, min.y, max.x - min.x + 1, max.y - min.y + 1);  
 }  



Help4J - Jar Search Engine. Easiest way to find jar and its source.

profile for Ashish Aggarwal on Stack Exchange, a network of free, community-driven Q&A sites

4 comments:

  1. Hi ,
    how to reverse the same convert jpeg to doc

    ReplyDelete
  2. Most of them marketed as free converters however after I would use their conversions and save them I bumped into challenges. Some would obtain an ideal PDF doc however in the event you didn't pay for his or her month-to-month subscriptions, they might put watermarks in your PDF. Please make sure to check out this Website

    ReplyDelete