<?php error_reporting(E_ERROR | E_WARNING | E_PARSE); $url = 'http://api.xxx.com/api.php?v=1&key=13316479731'; //$strr_ai = curl_request($url, array('wenzhang'=>$text )); switch($LabelArray['PageType']) { case 'List'://處理列表頁,只能處理html //$LabelArray['Html']='id="post_list"><a href="http://api-8.xxx.com/2310113.html">測試的網址的</a><id="pager_block">'; break; case 'Pages'://處理多頁,只能處理html //$LabelArray['Html']=str_replace('入園時間:</span>','入園時間:</span>插件添加在多頁時間前:',$LabelArray['Html']); break; case 'Content'://處理默認頁,只能處理html //$LabelArray['Html']=str_replace('<title>','<title>默認頁給標題加個前綴:',$LabelArray['Html']).'aaaaaaaaaaaaaaaaaaaaaaaaa'; break; case 'Save'://只有保存時是可以處理標簽值的 isset($LabelArray['作者']) && $LabelArray['作者'].=' 保存時您可以修改任意標簽的值'; //$LabelArray['內容'] = strip_tags($LabelArray['內容'], '<p><strong><img><br>'); // 保存原文 $src_contents = $LabelArray['內容']; // 去除HTML標簽的屬性 $LabelArray['內容'] = clean_contents($LabelArray['內容']); //$LabelArray['內容'] = curl_request($url, array('wenzhang'=>$LabelArray['內容'] )); $temp = curl_request($url, array('wenzhang'=>$LabelArray['標題'].' [0x1555] '.PHP_EOL.$LabelArray['內容'] )); //$temp2 = $temp; $temp = str_replace('[0x1555] ', '[0x1555]', $temp); $temp = str_replace(' [0x1555]', '[0x1555]', $temp); $temp = str_replace('[0x1555]'.PHP_EOL, '[0x1555]', $temp); $temp = str_replace('[0x1555]'." ", '[0x1555]', $temp); $temp = explode('[0x1555]', $temp); $LabelArray['內容']=$temp[1]; $LabelArray['標題']=$temp[0]; // 返回結果處理 if (strpos($LabelArray['內容'], 'connect to host') !== false) { $LabelArray['內容'] = $src_contents; } break; default: //$LabelArray['內容']=curl_request($url, array('wenzhang'=>$LabelArray['內容'] )); } echo serialize($LabelArray); function clean_contents($contents) { // $str = preg_replace('#<([^>s/]+)[^>]*>#','<$1>', $contents); // return $str; $sa = new cleanHtml; $sa->allow = array( 'src' ); $sa->exceptions = array( 'img' => array( 'src', 'alt' ), //'a' => array( 'href', 'title' ), 'iframe'=>array('src','frameborder'), ); $str = $sa->strip( $contents ); return $str; } //參數1:訪問的URL,參數2:post數據(不填則為GET),參數3:提交的$cookies,參數4:是否返回$cookies function curl_request($url,$post='',$cookie='', $returnCookie=0){ $curl = curl_init(); curl_setopt($curl, CURLOPT_URL, $url); curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)'); if (ini_get('open_basedir') == '' && strtolower(ini_get('safe_mode')) != 'on'){ curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1); } curl_setopt($curl, CURLOPT_AUTOREFERER, 1); curl_setopt($curl, CURLOPT_REFERER, "http://XXX"); if($post) { curl_setopt($curl, CURLOPT_POST, 1); curl_setopt($curl, CURLOPT_POSTFIELDS, http_build_query($post)); } if($cookie) { curl_setopt($curl, CURLOPT_COOKIE, $cookie); } curl_setopt($curl, CURLOPT_HEADER, $returnCookie); curl_setopt($curl, CURLOPT_TIMEOUT, 50); curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); $data = curl_exec($curl); if (curl_errno($curl)) { return curl_error($curl); } curl_close($curl); if($returnCookie){ list($header, $body) = explode(" ", $data, 2); preg_match_all("/Set-Cookie:([^;]*);/", $header, $matches); $info['cookie'] = substr($matches[1][0], 1); $info['content'] = $body; return $info; }else{ return $data; } } //echo $tag; // 計算中文字符串長度 function utf8_strlen($string = null) { // 將字符串分解為單元 preg_match_all("/./us", $string, $match); // 返回單元個數 return count($match[0]); } function reg_escape( $str ) { $conversions = array( "^" => "^", "[" => "[", "." => ".", "$" => "$", "{" => "{", "*" => "*", "(" => "(", "\" => "\\", "/" => "/", "+" => "+", ")" => ")", "|" => "|", "?" => "?", "<" => "<", ">" => ">" ); return strtr( $str, $conversions ); } /** * Strip attribute Class * Remove attributes from XML elements * @author David (semlabs.co.uk) * @version 0.2.1 */ class cleanHtml{ public $str = ''; public $allow = array(); public $exceptions = array(); public $ignore = array(); public function strip( $str ) { $this->str = $str; if( is_string( $str ) && strlen( $str ) > 0 ) { $res = $this->findElements(); if( is_string( $res ) ) return $res; $nodes = $this->findAttributes( $res ); $this->removeAttributes( $nodes ); } return $this->str; } private function findElements() { # Create an array of elements with attributes $nodes = array(); preg_match_all( "/<([^ !/> ]+)([^>]*)>/i", $this->str, $elements ); foreach( $elements[1] as $el_key => $element ) { if( $elements[2][$el_key] ) { $literal = $elements[0][$el_key]; $element_name = $elements[1][$el_key]; $attributes = $elements[2][$el_key]; if( is_array( $this->ignore ) && !in_array( $element_name, $this->ignore ) ) $nodes[] = array( 'literal' => $literal, 'name' => $element_name, 'attributes' => $attributes ); } } # Return the XML if there were no attributes to remove if( !$nodes[0] ) return $this->str; else return $nodes; } private function findAttributes( $nodes ) { # Extract attributes foreach( $nodes as &$node ) { preg_match_all( "/([^ =]+)s*=s*["|']{0,1}([^"']*)["|']{0,1}/i", $node['attributes'], $attributes ); if( $attributes[1] ) { foreach( $attributes[1] as $att_key => $att ) { $literal = $attributes[0][$att_key]; $attribute_name = $attributes[1][$att_key]; $value = $attributes[2][$att_key]; $atts[] = array( 'literal' => $literal, 'name' => $attribute_name, 'value' => $value ); } } else $node['attributes'] = null; $node['attributes'] = $atts; unset( $atts ); } return $nodes; } private function removeAttributes( $nodes ) { # Remove unwanted attributes foreach( $nodes as $node ) { # Check if node has any attributes to be kept $node_name = $node['name']; $new_attributes = ''; if( is_array( $node['attributes'] ) ) { foreach( $node['attributes'] as $attribute ) { if( ( is_array( $this->allow ) && in_array( $attribute['name'], $this->allow ) ) || $this->isException( $node_name, $attribute['name'], $this->exceptions ) ) $new_attributes = $this->createAttributes( $new_attributes, $attribute['name'], $attribute['value'] ); } } $replacement = ( $new_attributes ) ? "<$node_name $new_attributes>" : "<$node_name>"; $this->str = preg_replace( '/'. reg_escape( $node['literal'] ) .'/', $replacement, $this->str ); } } private function isException( $element_name, $attribute_name, $exceptions ) { if( array_key_exists($element_name, $this->exceptions) ) { if( in_array( $attribute_name, $this->exceptions[$element_name] ) ) return true; } return false; } private function createAttributes( $new_attributes, $name, $value ) { if( $new_attributes ) $new_attributes .= " "; $new_attributes .= "$name="$value""; return $new_attributes; } } ?>
- 1163K網站系統微信支付、支付寶支付,實時原路退款,設置教程(X版)
- 2163K網站系統相親自動實名認證-百度AI配置教程
- 3藍天采集器圖片本地化的功能,目前貌似沒用?
- 4藍天采集器內容頁指定關鍵詞辦法參考
- 5PbootCMS性能優化研究之網頁響應速度
- 6藍天采集器如何卸載刪除操作教程
- 7藍天采集器下載圖片絕對路徑修改為相對路徑操作方法
- 8藍天采集器自動采集插件在米拓5.3.19下無法采集
- 9藍天采集器數據庫問題
- 10藍天采集器求助 發布設置中WordPress怎么檢測不到?
- 11163K網站系統同一服務器多個網站系統配置自動升級教程
- 12藍天采集器內容允許匹配多個元素循環入庫教程
- 13藍天采集器wordpress發布到網站數據庫需怎樣設置入庫規則?
- 14藍天采集器需要采集的字段,他們的class都同名 怎么辦?
- 15藍天采集器采集重復: 被采集的內容頁網址會自動變導致重復
- 16藍天采集器開啟自動采集總是莫名其妙自己停止了
- 17藍天采集器文章采集示例教程
- 18Discuz! Q寶塔面板在線安裝操作教程
- 19藍天采集器系統單次采集過多數據會卡死
- 20【阿里云備案】關于網站主頁下方標明備案編號的通知
-
pbootcms常見標簽調用學習
1、列表頁調用文章內容tags: {pboot:tags scode=*}<a href="tags:link">tags:text</a>{···
-
如何添加阿里云服務器安全組規則
下面是開通阿里云安全組的7070端口教程(此端口用在門戶程序自動升級):1、 登錄阿里云控制臺https://ecs.console.aliyun.com2、 如下圖點擊進入:3、 如下圖,找到對應服務器實例,點擊進入:4、如下圖,在更多里···
-
163K網站系統《163K網站系統》···
《163K網站系統》HTTPS介紹、基本教程、代操作套餐服務 一、什么是HTTPS 請參見度娘:https://baike.baidu.com/item/https/285356?fr=aladdin 二、為什么要實現H···
-
藍天采集器希望指定網址允許重復采集或···
有的網址,一直都是一樣的,只是里面的數據會變,比如pubg的新浪微博,這是微博的json數據 https://m.weibo.cn/profile/info?uid=6037906900 這網址一直不會變,但里面的數據就會變,如果可以單獨指···
-
藍天采集器請管理員考慮添加自動登陸功···
每次采集隔斷時間 cookies就失效了 可以搞一個人采集之前自動登陸的功能嗎》??