Tuesday, 23 December 2014

How to extract text from word file .doc,docx,.xlsx,.pptx and txt file using jquery ajax in php script

//create a php file

<?php     
            session_start();//start php session
            $filePath = 'uploads/' . $_FILES['file']['name'];
            $text='';
            if ( 0 < $_FILES['file']['error'] ) {
                echo 'Error: ' . $_FILES['file']['error'] . '<br>';       
            } else {
                     move_uploaded_file($_FILES['file']['tmp_name'], $filePath);
            }
   
        $text = convertToText($filePath);
        //delete file from directory
        unlink($filePath);
        //Set Text variable in session
        $_SESSION['text'] = $text;   
         echo $text;
       // Extract text from files
        function convertToText($filePath) {
             if(isset($filePath) && !file_exists($filePath)) {
                      return "File Not exists";
             }
        $fileArray = pathinfo($filePath);
        $file_ext  = $fileArray['extension'];       
        if($file_ext == "txt" || $file_ext == "doc" || $file_ext == "docx" || $file_ext == "xlsx" || $file_ext == "pptx")
        {
            if($file_ext == "txt") {
                   return read_txt($filePath);
            }elseif($file_ext == "doc") {
                   return read_doc($filePath);
            } elseif($file_ext == "docx") {
                   return read_docx($filePath);
            } elseif($file_ext == "xlsx") {
                   return xlsx_to_text($filePath);
            }elseif($file_ext == "pptx") {
                   return pptx_to_text($filePath);
            }
        } else {
            return "Invalid File Type";
        }
    }
   
    /*************************Extract text from txt files*****************************/
    function read_txt($filePath){
           $txtText='';
           $fh = fopen($filePath,'r');
            while ($line = fgets($fh)) {
                 $txtText.=$line;
            }
          fclose($fh);
         return $txtText;
    }
   
    /*************************Extract text from doc files*****************************/
    function read_doc($filePath) {
        $fileHandle = fopen($filePath, "r");
        $line = @fread($fileHandle, filesize($filePath));  
        $lines = explode(chr(0x0D),$line);
        $docText = "";
        foreach($lines as $thisline)
          {
            $pos = strpos($thisline, chr(0x00));
            if (($pos !== FALSE)||(strlen($thisline)==0)){
              } else {
                   $docText .= $thisline." ";
              }
          }
         $docText = preg_replace("/[^a-zA-Z0-9\s\,\.\-\n\r\t@\/\_\(\)]/","",$docText);
        return $docText;
    }
   
    /*************************Extract text from docx files*****************************/
    function read_docx($filePath){
        $striped_content = '';
        $content = '';
        $zip = zip_open($filePath);
        if (!$zip || is_numeric($zip)) return false;
        while ($zip_entry = zip_read($zip)) {
            if (zip_entry_open($zip, $zip_entry) == FALSE) continue;
            if (zip_entry_name($zip_entry) != "word/document.xml") continue;
            $content .= zip_entry_read($zip_entry, zip_entry_filesize($zip_entry));
            zip_entry_close($zip_entry);
        }// end while

        zip_close($zip);

        $content = str_replace('</w:r></w:p></w:tc><w:tc>', " ", $content);
        $content = str_replace('</w:r></w:p>', "\r\n", $content);
        $striped_content = strip_tags($content);
        return $striped_content;
    }
   
   
    /*******Extract text from excel sheet files****/
    function xlsx_to_text($filePath){
    $xml_filename = "xl/sharedStrings.xml"; //content file name
    $zip_handle = new ZipArchive;
    $output_text = "";
    if(true === $zip_handle->open($filePath)){
        if(($xml_index = $zip_handle->locateName($xml_filename)) !== false){
            $xml_datas = $zip_handle->getFromIndex($xml_index);
            $xml_handle = DOMDocument::loadXML($xml_datas, LIBXML_NOENT | LIBXML_XINCLUDE | LIBXML_NOERROR | LIBXML_NOWARNING);
            $output_text = strip_tags($xml_handle->saveXML());
        }else{
            $output_text .="";
        }
        $zip_handle->close();
    }else{
    $output_text .="";
    }
    return $output_text;
}

/*****Extract text from power point files*****/
function pptx_to_text($filePath){
    $zip_handle = new ZipArchive;
    $output_text = "";
    if(true === $zip_handle->open($filePath)){
        $slide_number = 1; //loop through slide files
        while(($xml_index = $zip_handle->locateName("ppt/slides/slide".$slide_number.".xml")) !== false){
            $xml_datas = $zip_handle->getFromIndex($xml_index);
            $xml_handle = DOMDocument::loadXML($xml_datas, LIBXML_NOENT | LIBXML_XINCLUDE | LIBXML_NOERROR | LIBXML_NOWARNING);
            $output_text .= strip_tags($xml_handle->saveXML());
            $slide_number++;
        }
        if($slide_number == 1){
            $output_text .="";
        }
        $zip_handle->close();
    }else{
    $output_text .="";
    }
    return $output_text;
}

?>

//jquery code

/******** Upload a File ******/
            $(function(){
            $('#upload_files').on('change', function() {
                var file_data = $('#upload_files').prop('files')[0];
                var form_data = new FormData();                 
                form_data.append('file', file_data)
                // alert(form_data);                            
                $.ajax({
                            url: "<?php echo home_url('/wp-content/plugins/gust').'/upload_files.php'; ?>", // point to server-side PHP script
                            dataType: 'text',  // what to expect back from the PHP script, if anything
                            cache: false,
                            contentType: false,
                            processData: false,
                            data: form_data,                        
                            type: 'post',
                            success: function(php_script_response){
                            if(php_script_response == '' || php_script_response == null){
                            alert('Invalid File Type!');
                            return false;
                            }else{
                            location.reload();
                            }
                            }
                 });
            });
            });
         
//css file

/***********put it in css file***********/
.fileUpload {
    position: relative;
    overflow: hidden;
      position: relative;
       float: left;
    margin-right: 105px;
       margin-top: 7px;
}
.fileUpload.btn.btn-primary:hover span{color:#fff;}
.fileUpload input.upload {
    position: absolute;
    top: 0;
    right: 0;
    margin: 0;
    padding: 0;
    font-size: 20px;
    cursor: pointer;
    opacity: 0;
    filter: alpha(opacity=0);
    width: 120px;
}

No comments:

Post a Comment