/** * 中文分词(pullword.com) * * @param string $str * @param integer $model 取词模式 0 全部词汇 1 以 100% 的概率拉词 * @param integer $debug 调试模式 0 调试模式关闭 1 调试模式开启(显示每个单词的所有概率) * @return array * @author 617 <email:723875993@qq.com> */ function cutwords($str, $model = 0, $debug = 0) { if (empty($str)) return []; $ret = []; // 每次最多500字,超出500字截断 if (($str_len = mb_strlen($str)) > 500) { $cut_len = 500; for ($i = 0; $i < (int)($str_len / 500 + 1); $i++) { $start = $i == 0 ? 0 : ($i * $cut_len) + 2; $cut_str = mb_substr($str, $start, $cut_len); if (empty($cut_str)) break; $cut_str = urlencode($cut_str); $str_arr = http_get("http://api.pullword.com/get.php?source=$cut_str¶m1=$model¶m2=$debug"); $str_arr = array_filter(explode("\r\n", $str_arr)); $ret = array_merge($str_arr, $ret); } } else { $str = urlencode($str); $str = http_get("http://api.pullword.com/get.php?source=$str¶m1=$model¶m2=$debug"); $ret = array_filter(explode("\r\n", $str)); } if (!empty($ret)) { $ret = array_unique($ret); // 将分词转为数组键, isset作比对 $ret = array_combine($ret, array_fill(0, count($ret), '*')); } return $ret; } // 下面是个人业务逻辑 /** * 检测是否命中关键字,如果命中,记录并预警 * * @param string $text * @return boolean * @author 617 <email:723875993@qq.com> */ public function checkText($field, $uid, $user_form_id, $user_form_field_id) { $redis_key = env('redisKey.UserKeyWords') . $uid; // 是否命中关键词 $WarningKeyWords = $this->redisCache->getInfo(env('redisKey.WarningKeyWords')); // 内容分词 $cut_words = cutwords($field['field_value']); $hit = []; foreach ($WarningKeyWords as $item) { if (isset($cut_words[$item])) { $hit[] = [ 'word' => $item, 'field_name' => $field['field_name'], 'field_value' => $field['field_value'], ]; } } if (!empty($hit)) { $data = [ 'uid' => $uid, 'user_form_id' => (int)$user_form_id, 'user_form_field_id' => (int)$user_form_field_id, 'hit' => $hit, ]; // $this->redisCache->delByKey($redis_key); $old_data = $this->redisCache->getInfo($redis_key); if (empty($old_data)) { $old_data = [serialize($data)]; $this->redisCache->setInfo($redis_key, $old_data); } else { $user_form_field_ids = []; foreach ($old_data as $key => $value) { $de_arr = unserialize($value); $user_form_field_ids[$key] = $de_arr['user_form_field_id']; } if (($index = array_search($data['user_form_field_id'], $user_form_field_ids)) !== FALSE) { $old_data[$index] = serialize($data); } else { array_push($old_data, serialize($data)); } $old_data = array_values($old_data); $this->redisCache->setInfo($redis_key, $old_data); } } }