こういうテキストがあって
"take your time and take it easy."
ユニークにして重複はカウントアップして
以下のような3パターンの結果を得たい
- get_count_text1
- [[and, easy, it, take, time, your], [1.0, 1.0, 1.0, 2.0, 1.0, 1.0]]
- get_count_text2
- [[and, 1.0], [easy, 1.0], [it, 1.0], [take, 2.0], [time, 1.0], [your, 1.0]]
- get_count_text3
- [{count=1.0, text=and}, {count=1.0, text=easy}, {count=1.0, text=it}, {count=2.0, text=take}, {count=1.0, text=time}, {count=1.0, text=your}]
このコードで分割する単位
漢字、ひらがな、カタカナ、半角カタカナ、半角英数、全角英数
それを実現する正規表現
/[々〆〇〻㐂-頻]+|[ぁ-ゟー]+|[゠-ヿ]+|[ヲ-゚]+|[a-zA-Z0-9]+|[a-zA-Z0-9]+/g
コード.gs
function split_text() { var str = "take your time and take it easy."; var pattern = /[々〆〇〻㐂-頻]+|[ぁ-ゟー]+|[゠-ヿ]+|[ヲ-゚]+|[a-zA-Z0-9]+|[a-zA-Z0-9]+/g; var result = str.match(pattern); var count_text = get_count_text3(result); Logger.log(count_text); } function get_count_text1(array) { var num = 0; var word = []; var count = []; array = array.sort(); for (var i = 0; i < array.length; i++) { if(i == 0){//配列内の最初の要素の場合 word.push(array[i]); count.push(1); num = i; }else if(array[i] == array[(i-1)]) {//前の要素と同じ値の場合 count[num] = count[num] + 1;//カウントアップ }else {//前の要素と違う場合 word.push(array[i]); count.push(1); num = num + 1; } } var result = [word, count]; return result; } function get_count_text2(array) { var num = 0; var result = []; array = array.sort(); for (var i = 0; i < array.length; i++) { if(i == 0){ result.push([array[i], 1]); num = i; }else if(array[i] == array[(i-1)]) { result[num][1] = result[num][1] + 1; }else { result.push([array[i], 1]); num = num + 1; } } return result; } function get_count_text3(array){ var sorted = array.sort(); var arrays = []; var count; var unique_i; for(var i = 0; i < sorted.length; i++){ var obj = {} var value = sorted[i]; if(value === sorted[i-1]){ count++; arrays[unique_i]["count"] = count; }else{ count = 1; obj["text"] = value; obj["count"] = count; unique_i = arrays.length; arrays.push(obj); } } return arrays; } |