Zipped DOCX XML parse and delete commented block

$filename = ‘demo_ms_word.docx’;
$ext = end(explode(‘.’, $filename));
//if its docx file
if($ext == ‘docx’)
$dataFile = “word/document.xml”;
die(‘Wrong File’);

//Create a new ZIP archive object
$zip = new ZipArchive;

// Open the archive file
if (true === $zip->open($filename)) {
// If successful, search for the data file in the archive
if (($index = $zip->locateName($dataFile)) !== false) {
// Index found! Now read it to a string
$text = $zip->getFromIndex($index);

$document = new DOMDocument();
$xpath = new DOMXPath($document);

// Find the DIV with ID “some-div”.
$remove_elements = array();
$notfoundOnSameNode = true;

//From Condition Query found what elemenet need to delete
$tagToDelete = ‘tbl’;

$node = $xpath->query(‘//w:’.$tagToDelete.'[descendant::w:commentRangeStart[@w:id=”2″]]’)->item(0);
$remove_elements[] = $node;

$notfoundOnSameNode = false;

while ($node = $node->nextSibling) {
$remove_elements[] = $node;
//echo $node->nodeName;
//echo $node->nodeValue;
// Skip stuff like “#text” elements which cause problems.

//Delete all remove elmenets from docx
foreach ($remove_elements as $element) {

$final_xml = $document->saveXML();

//Write the new…
$zip->addFromString($dataFile, $final_xml);
//Close the archive file


Leave a Reply

Fill in your details below or click an icon to log in: Logo

You are commenting using your account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s