Zipped DOCX XML parse and delete commented block

$filename = ‘demo_ms_word.docx’;
$ext = end(explode(‘.’, $filename));
//if its docx file
if($ext == ‘docx’)
{
$dataFile = “word/document.xml”;
}
else
{
die(‘Wrong File’);
}

//Create a new ZIP archive object
$zip = new ZipArchive;

// Open the archive file
if (true === $zip->open($filename)) {
// If successful, search for the data file in the archive
if (($index = $zip->locateName($dataFile)) !== false) {
// Index found! Now read it to a string
$text = $zip->getFromIndex($index);

$document = new DOMDocument();
$document->loadXML($text);
$xpath = new DOMXPath($document);

// Find the DIV with ID “some-div”.
$remove_elements = array();
$notfoundOnSameNode = true;

//From Condition Query found what elemenet need to delete
$tagToDelete = ‘tbl’;

$node = $xpath->query(‘//w:’.$tagToDelete.'[descendant::w:commentRangeStart[@w:id=”2″]]’)->item(0);
$remove_elements[] = $node;

if($node->getElementsByTagName(‘w:commentRangeEnd’))
{
$notfoundOnSameNode = false;
}

if($notfoundOnSameNode)
{
while ($node = $node->nextSibling) {
$remove_elements[] = $node;
//echo $node->nodeName;
//echo $node->nodeValue;
// Skip stuff like “#text” elements which cause problems.
if($node->getElementsByTagName(‘w:commentRangeEnd’))
{
break;
}
continue;
}
}

//Delete all remove elmenets from docx
foreach ($remove_elements as $element) {
$element->parentNode->removeChild($element);
}

$final_xml = $document->saveXML();

$zip->deleteName($dataFile);
//Write the new…
$zip->addFromString($dataFile, $final_xml);
}
//Close the archive file
$zip->close();
}

Sourabh Jain

Web Developer with php , Wordpress , Joomla , Code-igniter , Ajax , JQuery

Zipped DOCX XML parse and delete commented block

Leave a comment Cancel reply

Rate this:

Share this:

Related

Leave a comment Cancel reply