mirror of
https://github.com/mariusgreuel/avrdude.git
synced 2025-09-28 15:05:27 +00:00
Just to make sure this won't get lost over time, make a copy of
Atmel's publically available documentation files for reference. git-svn-id: svn://svn.savannah.nongnu.org/avrdude/trunk/avrdude@1361 81a1dc3b-b13d-400b-aceb-764788c761c2
This commit is contained in:
234
atmel-docs/EDBG/protocoldocs/search/stemmers/en_stemmer.js
Normal file
234
atmel-docs/EDBG/protocoldocs/search/stemmers/en_stemmer.js
Normal file
@@ -0,0 +1,234 @@
|
||||
// Porter stemmer in Javascript. Few comments, but it's easy to follow against the rules in the original
|
||||
// paper, in
|
||||
//
|
||||
// Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
|
||||
// no. 3, pp 130-137,
|
||||
//
|
||||
// see also http://www.tartarus.org/~martin/PorterStemmer
|
||||
|
||||
// Release 1
|
||||
// Derived from (http://tartarus.org/~martin/PorterStemmer/js.txt) - cjm (iizuu) Aug 24, 2009
|
||||
|
||||
var stemmer = (function(){
|
||||
var step2list = {
|
||||
"ational" : "ate",
|
||||
"tional" : "tion",
|
||||
"enci" : "ence",
|
||||
"anci" : "ance",
|
||||
"izer" : "ize",
|
||||
"bli" : "ble",
|
||||
"alli" : "al",
|
||||
"entli" : "ent",
|
||||
"eli" : "e",
|
||||
"ousli" : "ous",
|
||||
"ization" : "ize",
|
||||
"ation" : "ate",
|
||||
"ator" : "ate",
|
||||
"alism" : "al",
|
||||
"iveness" : "ive",
|
||||
"fulness" : "ful",
|
||||
"ousness" : "ous",
|
||||
"aliti" : "al",
|
||||
"iviti" : "ive",
|
||||
"biliti" : "ble",
|
||||
"logi" : "log"
|
||||
},
|
||||
|
||||
step3list = {
|
||||
"icate" : "ic",
|
||||
"ative" : "",
|
||||
"alize" : "al",
|
||||
"iciti" : "ic",
|
||||
"ical" : "ic",
|
||||
"ful" : "",
|
||||
"ness" : ""
|
||||
},
|
||||
|
||||
c = "[^aeiou]", // consonant
|
||||
v = "[aeiouy]", // vowel
|
||||
C = c + "[^aeiouy]*", // consonant sequence
|
||||
V = v + "[aeiou]*", // vowel sequence
|
||||
|
||||
mgr0 = "^(" + C + ")?" + V + C, // [C]VC... is m>0
|
||||
meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$", // [C]VC[V] is m=1
|
||||
mgr1 = "^(" + C + ")?" + V + C + V + C, // [C]VCVC... is m>1
|
||||
s_v = "^(" + C + ")?" + v; // vowel in stem
|
||||
|
||||
return function (w) {
|
||||
var stem,
|
||||
suffix,
|
||||
firstch,
|
||||
re,
|
||||
re2,
|
||||
re3,
|
||||
re4,
|
||||
origword = w;
|
||||
|
||||
if (w.length < 3) { return w; }
|
||||
|
||||
firstch = w.substr(0,1);
|
||||
if (firstch == "y") {
|
||||
w = firstch.toUpperCase() + w.substr(1);
|
||||
}
|
||||
|
||||
// Step 1a
|
||||
re = /^(.+?)(ss|i)es$/;
|
||||
re2 = /^(.+?)([^s])s$/;
|
||||
|
||||
if (re.test(w)) { w = w.replace(re,"$1$2"); }
|
||||
else if (re2.test(w)) { w = w.replace(re2,"$1$2"); }
|
||||
|
||||
// Step 1b
|
||||
re = /^(.+?)eed$/;
|
||||
re2 = /^(.+?)(ed|ing)$/;
|
||||
if (re.test(w)) {
|
||||
var fp = re.exec(w);
|
||||
re = new RegExp(mgr0);
|
||||
if (re.test(fp[1])) {
|
||||
re = /.$/;
|
||||
w = w.replace(re,"");
|
||||
}
|
||||
} else if (re2.test(w)) {
|
||||
var fp = re2.exec(w);
|
||||
stem = fp[1];
|
||||
re2 = new RegExp(s_v);
|
||||
if (re2.test(stem)) {
|
||||
w = stem;
|
||||
re2 = /(at|bl|iz)$/;
|
||||
re3 = new RegExp("([^aeiouylsz])\\1$");
|
||||
re4 = new RegExp("^" + C + v + "[^aeiouwxy]$");
|
||||
if (re2.test(w)) { w = w + "e"; }
|
||||
else if (re3.test(w)) { re = /.$/; w = w.replace(re,""); }
|
||||
else if (re4.test(w)) { w = w + "e"; }
|
||||
}
|
||||
}
|
||||
|
||||
// Step 1c
|
||||
re = new RegExp("^(.+" + c + ")y$");
|
||||
if (re.test(w)) {
|
||||
var fp = re.exec(w);
|
||||
stem = fp[1];
|
||||
w = stem + "i";
|
||||
}
|
||||
|
||||
// Step 2
|
||||
re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
|
||||
if (re.test(w)) {
|
||||
var fp = re.exec(w);
|
||||
stem = fp[1];
|
||||
suffix = fp[2];
|
||||
re = new RegExp(mgr0);
|
||||
if (re.test(stem)) {
|
||||
w = stem + step2list[suffix];
|
||||
}
|
||||
}
|
||||
|
||||
// Step 3
|
||||
re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
|
||||
if (re.test(w)) {
|
||||
var fp = re.exec(w);
|
||||
stem = fp[1];
|
||||
suffix = fp[2];
|
||||
re = new RegExp(mgr0);
|
||||
if (re.test(stem)) {
|
||||
w = stem + step3list[suffix];
|
||||
}
|
||||
}
|
||||
|
||||
// Step 4
|
||||
re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
|
||||
re2 = /^(.+?)(s|t)(ion)$/;
|
||||
if (re.test(w)) {
|
||||
var fp = re.exec(w);
|
||||
stem = fp[1];
|
||||
re = new RegExp(mgr1);
|
||||
if (re.test(stem)) {
|
||||
w = stem;
|
||||
}
|
||||
} else if (re2.test(w)) {
|
||||
var fp = re2.exec(w);
|
||||
stem = fp[1] + fp[2];
|
||||
re2 = new RegExp(mgr1);
|
||||
if (re2.test(stem)) {
|
||||
w = stem;
|
||||
}
|
||||
}
|
||||
|
||||
// Step 5
|
||||
re = /^(.+?)e$/;
|
||||
if (re.test(w)) {
|
||||
var fp = re.exec(w);
|
||||
stem = fp[1];
|
||||
re = new RegExp(mgr1);
|
||||
re2 = new RegExp(meq1);
|
||||
re3 = new RegExp("^" + C + v + "[^aeiouwxy]$");
|
||||
if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) {
|
||||
w = stem;
|
||||
}
|
||||
}
|
||||
|
||||
re = /ll$/;
|
||||
re2 = new RegExp(mgr1);
|
||||
if (re.test(w) && re2.test(w)) {
|
||||
re = /.$/;
|
||||
w = w.replace(re,"");
|
||||
}
|
||||
|
||||
// and turn initial Y back to y
|
||||
|
||||
if (firstch == "y") {
|
||||
w = firstch.toLowerCase() + w.substr(1);
|
||||
}
|
||||
|
||||
// See http://snowball.tartarus.org/algorithms/english/stemmer.html
|
||||
// "Exceptional forms in general"
|
||||
var specialWords = {
|
||||
"skis" : "ski",
|
||||
"skies" : "sky",
|
||||
"dying" : "die",
|
||||
"lying" : "lie",
|
||||
"tying" : "tie",
|
||||
"idly" : "idl",
|
||||
"gently" : "gentl",
|
||||
"ugly" : "ugli",
|
||||
"early": "earli",
|
||||
"only": "onli",
|
||||
"singly": "singl"
|
||||
};
|
||||
|
||||
if(specialWords[origword]){
|
||||
w = specialWords[origword];
|
||||
}
|
||||
|
||||
if( "sky news howe atlas cosmos bias \
|
||||
andes inning outing canning herring \
|
||||
earring proceed exceed succeed".indexOf(origword) !== -1 ){
|
||||
w = origword;
|
||||
}
|
||||
|
||||
// Address words overstemmed as gener-
|
||||
re = /.*generate?s?d?(ing)?$/;
|
||||
if( re.test(origword) ){
|
||||
w = w + 'at';
|
||||
}
|
||||
re = /.*general(ly)?$/;
|
||||
if( re.test(origword) ){
|
||||
w = w + 'al';
|
||||
}
|
||||
re = /.*generic(ally)?$/;
|
||||
if( re.test(origword) ){
|
||||
w = w + 'ic';
|
||||
}
|
||||
re = /.*generous(ly)?$/;
|
||||
if( re.test(origword) ){
|
||||
w = w + 'ous';
|
||||
}
|
||||
// Address words overstemmed as commun-
|
||||
re = /.*communit(ies)?y?/;
|
||||
if( re.test(origword) ){
|
||||
w = w + 'iti';
|
||||
}
|
||||
|
||||
return w;
|
||||
}
|
||||
})();
|
Reference in New Issue
Block a user